In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
In [2]:
df = pd.read_csv("PEP1.csv")
pd.set_option('display.max_columns',None)
df.head()
Out[2]:
Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle OverallQual OverallCond YearBuilt YearRemodAdd RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType MasVnrArea ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1 BsmtFinType2 BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating HeatingQC CentralAir Electrical 1stFlrSF 2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath BsmtHalfBath FullBath HalfBath Bedroom Kitchen KitchenQual TotRmsAbvGrd Functional Fireplaces FireplaceQu GarageType GarageYrBlt GarageFinish GarageCars GarageArea GarageQual GarageCond PavedDrive WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea PoolQC Fence MiscFeature MiscVal MoSold YrSold SaleType SaleCondition SalePrice
0 1 60 RL 65.0 8450 Pave NaN Reg Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story 7 5 2003 2003 Gable CompShg VinylSd VinylSd BrkFace 196.0 Gd TA PConc Gd TA No GLQ 706 Unf 0 150 856 GasA Ex Y SBrkr 856 854 0 1710 1 0 2 1 3 1 Gd 8 Typ 0 NaN Attchd 2003.0 RFn 2 548 TA TA Y 0 61 0 0 0 0 NaN NaN NaN 0 2 2008 WD Normal 208500
1 2 20 RL 80.0 9600 Pave NaN Reg Lvl AllPub FR2 Gtl Veenker Feedr Norm 1Fam 1Story 6 8 1976 1976 Gable CompShg MetalSd MetalSd None 0.0 TA TA CBlock Gd TA Gd ALQ 978 Unf 0 284 1262 GasA Ex Y SBrkr 1262 0 0 1262 0 1 2 0 3 1 TA 6 Typ 1 TA Attchd 1976.0 RFn 2 460 TA TA Y 298 0 0 0 0 0 NaN NaN NaN 0 5 2007 WD Normal 181500
2 3 60 RL 68.0 11250 Pave NaN IR1 Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story 7 5 2001 2002 Gable CompShg VinylSd VinylSd BrkFace 162.0 Gd TA PConc Gd TA Mn GLQ 486 Unf 0 434 920 GasA Ex Y SBrkr 920 866 0 1786 1 0 2 1 3 1 Gd 6 Typ 1 TA Attchd 2001.0 RFn 2 608 TA TA Y 0 42 0 0 0 0 NaN NaN NaN 0 9 2008 WD Normal 223500
3 4 70 RL 60.0 9550 Pave NaN IR1 Lvl AllPub Corner Gtl Crawfor Norm Norm 1Fam 2Story 7 5 1915 1970 Gable CompShg Wd Sdng Wd Shng None 0.0 TA TA BrkTil TA Gd No ALQ 216 Unf 0 540 756 GasA Gd Y SBrkr 961 756 0 1717 1 0 1 0 3 1 Gd 7 Typ 1 Gd Detchd 1998.0 Unf 3 642 TA TA Y 0 35 272 0 0 0 NaN NaN NaN 0 2 2006 WD Abnorml 140000
4 5 60 RL 84.0 14260 Pave NaN IR1 Lvl AllPub FR2 Gtl NoRidge Norm Norm 1Fam 2Story 8 5 2000 2000 Gable CompShg VinylSd VinylSd BrkFace 350.0 Gd TA PConc Gd TA Av GLQ 655 Unf 0 490 1145 GasA Ex Y SBrkr 1145 1053 0 2198 1 0 2 1 4 1 Gd 9 Typ 1 TA Attchd 2000.0 RFn 3 836 TA TA Y 192 84 0 0 0 0 NaN NaN NaN 0 12 2008 WD Normal 250000
In [3]:
df.describe(include="all")
Out[3]:
Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle OverallQual OverallCond YearBuilt YearRemodAdd RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType MasVnrArea ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1 BsmtFinType2 BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating HeatingQC CentralAir Electrical 1stFlrSF 2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath BsmtHalfBath FullBath HalfBath Bedroom Kitchen KitchenQual TotRmsAbvGrd Functional Fireplaces FireplaceQu GarageType GarageYrBlt GarageFinish GarageCars GarageArea GarageQual GarageCond PavedDrive WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea PoolQC Fence MiscFeature MiscVal MoSold YrSold SaleType SaleCondition SalePrice
count 1460.000000 1460.000000 1460 1201.000000 1460.000000 1460 91 1460 1460 1460 1460 1460 1460 1460 1460 1460 1460 1460.000000 1460.000000 1460.000000 1460.000000 1460 1460 1460 1460 1452 1452.000000 1460 1460 1460 1423 1423 1422 1423 1460.000000 1422 1460.000000 1460.000000 1460.000000 1460 1460 1460 1459 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460 1460.000000 1460 1460.000000 770 1379 1379.000000 1379 1460.000000 1460.000000 1379 1379 1460 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 7 281 54 1460.000000 1460.000000 1460.000000 1460 1460 1460.000000
unique NaN NaN 5 NaN NaN 2 2 4 4 2 5 3 25 9 8 5 8 NaN NaN NaN NaN 6 8 15 16 4 NaN 4 5 6 4 4 4 6 NaN 6 NaN NaN NaN 6 5 2 5 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 4 NaN 7 NaN 5 6 NaN 3 NaN NaN 5 5 3 NaN NaN NaN NaN NaN NaN 3 4 4 NaN NaN NaN 9 6 NaN
top NaN NaN RL NaN NaN Pave Grvl Reg Lvl AllPub Inside Gtl mes Norm Norm 1Fam 1Story NaN NaN NaN NaN Gable CompShg VinylSd VinylSd None NaN TA TA PConc TA TA No Unf NaN Unf NaN NaN NaN GasA Ex Y SBrkr NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN TA NaN Typ NaN Gd Attchd NaN Unf NaN NaN TA TA Y NaN NaN NaN NaN NaN NaN Gd MnPrv Shed NaN NaN NaN WD Normal NaN
freq NaN NaN 1151 NaN NaN 1454 50 925 1311 1459 1052 1382 225 1260 1445 1220 726 NaN NaN NaN NaN 1141 1434 515 504 864 NaN 906 1282 647 649 1311 953 430 NaN 1256 NaN NaN NaN 1428 741 1365 1334 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 735 NaN 1360 NaN 380 870 NaN 605 NaN NaN 1311 1326 1340 NaN NaN NaN NaN NaN NaN 3 157 49 NaN NaN NaN 1267 1198 NaN
mean 730.500000 56.897260 NaN 70.049958 10516.828082 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 6.099315 5.575342 1971.267808 1984.865753 NaN NaN NaN NaN NaN 103.685262 NaN NaN NaN NaN NaN NaN NaN 443.639726 NaN 46.549315 567.240411 1057.429452 NaN NaN NaN NaN 1162.626712 346.992466 5.844521 1515.463699 0.425342 0.057534 1.565068 0.382877 2.866438 1.046575 NaN 6.517808 NaN 0.613014 NaN NaN 1978.506164 NaN 1.767123 472.980137 NaN NaN NaN 94.244521 46.660274 21.954110 3.409589 15.060959 2.758904 NaN NaN NaN 43.489041 6.321918 2007.815753 NaN NaN 180921.195890
std 421.610009 42.300571 NaN 24.284752 9981.264932 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.382997 1.112799 30.202904 20.645407 NaN NaN NaN NaN NaN 181.066207 NaN NaN NaN NaN NaN NaN NaN 456.098091 NaN 161.319273 441.866955 438.705324 NaN NaN NaN NaN 386.587738 436.528436 48.623081 525.480383 0.518911 0.238753 0.550916 0.502885 0.815778 0.220338 NaN 1.625393 NaN 0.644666 NaN NaN 24.689725 NaN 0.747315 213.804841 NaN NaN NaN 125.338794 66.256028 61.119149 29.317331 55.757415 40.177307 NaN NaN NaN 496.123024 2.703626 1.328095 NaN NaN 79442.502883
min 1.000000 20.000000 NaN 21.000000 1300.000000 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.000000 1.000000 1872.000000 1950.000000 NaN NaN NaN NaN NaN 0.000000 NaN NaN NaN NaN NaN NaN NaN 0.000000 NaN 0.000000 0.000000 0.000000 NaN NaN NaN NaN 334.000000 0.000000 0.000000 334.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 NaN 2.000000 NaN 0.000000 NaN NaN 1900.000000 NaN 0.000000 0.000000 NaN NaN NaN 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 NaN NaN NaN 0.000000 1.000000 2006.000000 NaN NaN 34900.000000
25% 365.750000 20.000000 NaN 59.000000 7553.500000 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 5.000000 5.000000 1954.000000 1967.000000 NaN NaN NaN NaN NaN 0.000000 NaN NaN NaN NaN NaN NaN NaN 0.000000 NaN 0.000000 223.000000 795.750000 NaN NaN NaN NaN 882.000000 0.000000 0.000000 1129.500000 0.000000 0.000000 1.000000 0.000000 2.000000 1.000000 NaN 5.000000 NaN 0.000000 NaN NaN 1961.000000 NaN 1.000000 334.500000 NaN NaN NaN 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 NaN NaN NaN 0.000000 5.000000 2007.000000 NaN NaN 129975.000000
50% 730.500000 50.000000 NaN 69.000000 9478.500000 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 6.000000 5.000000 1973.000000 1994.000000 NaN NaN NaN NaN NaN 0.000000 NaN NaN NaN NaN NaN NaN NaN 383.500000 NaN 0.000000 477.500000 991.500000 NaN NaN NaN NaN 1087.000000 0.000000 0.000000 1464.000000 0.000000 0.000000 2.000000 0.000000 3.000000 1.000000 NaN 6.000000 NaN 1.000000 NaN NaN 1980.000000 NaN 2.000000 480.000000 NaN NaN NaN 0.000000 25.000000 0.000000 0.000000 0.000000 0.000000 NaN NaN NaN 0.000000 6.000000 2008.000000 NaN NaN 163000.000000
75% 1095.250000 70.000000 NaN 80.000000 11601.500000 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 7.000000 6.000000 2000.000000 2004.000000 NaN NaN NaN NaN NaN 166.000000 NaN NaN NaN NaN NaN NaN NaN 712.250000 NaN 0.000000 808.000000 1298.250000 NaN NaN NaN NaN 1391.250000 728.000000 0.000000 1776.750000 1.000000 0.000000 2.000000 1.000000 3.000000 1.000000 NaN 7.000000 NaN 1.000000 NaN NaN 2002.000000 NaN 2.000000 576.000000 NaN NaN NaN 168.000000 68.000000 0.000000 0.000000 0.000000 0.000000 NaN NaN NaN 0.000000 8.000000 2009.000000 NaN NaN 214000.000000
max 1460.000000 190.000000 NaN 313.000000 215245.000000 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 10.000000 9.000000 2010.000000 2010.000000 NaN NaN NaN NaN NaN 1600.000000 NaN NaN NaN NaN NaN NaN NaN 5644.000000 NaN 1474.000000 2336.000000 6110.000000 NaN NaN NaN NaN 4692.000000 2065.000000 572.000000 5642.000000 3.000000 2.000000 3.000000 2.000000 8.000000 3.000000 NaN 14.000000 NaN 3.000000 NaN NaN 2010.000000 NaN 4.000000 1418.000000 NaN NaN NaN 857.000000 547.000000 552.000000 508.000000 480.000000 738.000000 NaN NaN NaN 15500.000000 12.000000 2010.000000 NaN NaN 755000.000000
In [4]:
df.shape
Out[4]:
(1460, 81)
In [5]:
df.columns
Out[5]:
Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'Bedroom', 'Kitchen', 'KitchenQual', 'TotRmsAbvGrd',
       'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType', 'GarageYrBlt',
       'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond',
       'PavedDrive', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch',
       'ScreenPorch', 'PoolArea', 'PoolQC', 'Fence', 'MiscFeature', 'MiscVal',
       'MoSold', 'YrSold', 'SaleType', 'SaleCondition', 'SalePrice'],
      dtype='object')
In [6]:
df.describe().columns
Out[6]:
Index(['Id', 'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual',
       'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1',
       'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'Bedroom', 'Kitchen', 'TotRmsAbvGrd', 'Fireplaces',
       'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF',
       'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal',
       'MoSold', 'YrSold', 'SalePrice'],
      dtype='object')
In [7]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallCond    1460 non-null   int64  
 19  YearBuilt      1460 non-null   int64  
 20  YearRemodAdd   1460 non-null   int64  
 21  RoofStyle      1460 non-null   object 
 22  RoofMatl       1460 non-null   object 
 23  Exterior1st    1460 non-null   object 
 24  Exterior2nd    1460 non-null   object 
 25  MasVnrType     1452 non-null   object 
 26  MasVnrArea     1452 non-null   float64
 27  ExterQual      1460 non-null   object 
 28  ExterCond      1460 non-null   object 
 29  Foundation     1460 non-null   object 
 30  BsmtQual       1423 non-null   object 
 31  BsmtCond       1423 non-null   object 
 32  BsmtExposure   1422 non-null   object 
 33  BsmtFinType1   1423 non-null   object 
 34  BsmtFinSF1     1460 non-null   int64  
 35  BsmtFinType2   1422 non-null   object 
 36  BsmtFinSF2     1460 non-null   int64  
 37  BsmtUnfSF      1460 non-null   int64  
 38  TotalBsmtSF    1460 non-null   int64  
 39  Heating        1460 non-null   object 
 40  HeatingQC      1460 non-null   object 
 41  CentralAir     1460 non-null   object 
 42  Electrical     1459 non-null   object 
 43  1stFlrSF       1460 non-null   int64  
 44  2ndFlrSF       1460 non-null   int64  
 45  LowQualFinSF   1460 non-null   int64  
 46  GrLivArea      1460 non-null   int64  
 47  BsmtFullBath   1460 non-null   int64  
 48  BsmtHalfBath   1460 non-null   int64  
 49  FullBath       1460 non-null   int64  
 50  HalfBath       1460 non-null   int64  
 51  Bedroom        1460 non-null   int64  
 52  Kitchen        1460 non-null   int64  
 53  KitchenQual    1460 non-null   object 
 54  TotRmsAbvGrd   1460 non-null   int64  
 55  Functional     1460 non-null   object 
 56  Fireplaces     1460 non-null   int64  
 57  FireplaceQu    770 non-null    object 
 58  GarageType     1379 non-null   object 
 59  GarageYrBlt    1379 non-null   float64
 60  GarageFinish   1379 non-null   object 
 61  GarageCars     1460 non-null   int64  
 62  GarageArea     1460 non-null   int64  
 63  GarageQual     1379 non-null   object 
 64  GarageCond     1379 non-null   object 
 65  PavedDrive     1460 non-null   object 
 66  WoodDeckSF     1460 non-null   int64  
 67  OpenPorchSF    1460 non-null   int64  
 68  EnclosedPorch  1460 non-null   int64  
 69  3SsnPorch      1460 non-null   int64  
 70  ScreenPorch    1460 non-null   int64  
 71  PoolArea       1460 non-null   int64  
 72  PoolQC         7 non-null      object 
 73  Fence          281 non-null    object 
 74  MiscFeature    54 non-null     object 
 75  MiscVal        1460 non-null   int64  
 76  MoSold         1460 non-null   int64  
 77  YrSold         1460 non-null   int64  
 78  SaleType       1460 non-null   object 
 79  SaleCondition  1460 non-null   object 
 80  SalePrice      1460 non-null   int64  
dtypes: float64(3), int64(35), object(43)
memory usage: 924.0+ KB
In [8]:
df.size
Out[8]:
118260
In [9]:
pd.set_option('display.max_rows',None)
isnull_columns = df.isna().sum()
isnull_columns = isnull_columns[isnull_columns>0]
isnull_columns
Out[9]:
LotFrontage      259
Alley           1369
MasVnrType         8
MasVnrArea         8
BsmtQual          37
BsmtCond          37
BsmtExposure      38
BsmtFinType1      37
BsmtFinType2      38
Electrical         1
FireplaceQu      690
GarageType        81
GarageYrBlt       81
GarageFinish      81
GarageQual        81
GarageCond        81
PoolQC          1453
Fence           1179
MiscFeature     1406
dtype: int64
In [10]:
isnull_col_re = isnull_columns[isnull_columns>1100]
In [11]:
list = isnull_col_re.index[0:].tolist()
print(list)
['Alley', 'PoolQC', 'Fence', 'MiscFeature']
In [12]:
df = df.drop(columns=list, axis=1)
isnull_columns = df.isna().sum()
isnull_columns = isnull_columns[isnull_columns>0]
isnull_columns
Out[12]:
LotFrontage     259
MasVnrType        8
MasVnrArea        8
BsmtQual         37
BsmtCond         37
BsmtExposure     38
BsmtFinType1     37
BsmtFinType2     38
Electrical        1
FireplaceQu     690
GarageType       81
GarageYrBlt      81
GarageFinish     81
GarageQual       81
GarageCond       81
dtype: int64
In [13]:
df.shape
Out[13]:
(1460, 77)

Spliting Dataframe¶

In [14]:
df_n = df.select_dtypes(exclude = 'object')
df_c = df.select_dtypes(include = 'object')
df_c['Id'] = df['Id']
df_c['SalePrice'] = df['SalePrice']
In [15]:
df_c.head()
Out[15]:
MSZoning Street LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2 Heating HeatingQC CentralAir Electrical KitchenQual Functional FireplaceQu GarageType GarageFinish GarageQual GarageCond PavedDrive SaleType SaleCondition Id SalePrice
0 RL Pave Reg Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA No GLQ Unf GasA Ex Y SBrkr Gd Typ NaN Attchd RFn TA TA Y WD Normal 1 208500
1 RL Pave Reg Lvl AllPub FR2 Gtl Veenker Feedr Norm 1Fam 1Story Gable CompShg MetalSd MetalSd None TA TA CBlock Gd TA Gd ALQ Unf GasA Ex Y SBrkr TA Typ TA Attchd RFn TA TA Y WD Normal 2 181500
2 RL Pave IR1 Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA Mn GLQ Unf GasA Ex Y SBrkr Gd Typ TA Attchd RFn TA TA Y WD Normal 3 223500
3 RL Pave IR1 Lvl AllPub Corner Gtl Crawfor Norm Norm 1Fam 2Story Gable CompShg Wd Sdng Wd Shng None TA TA BrkTil TA Gd No ALQ Unf GasA Gd Y SBrkr Gd Typ Gd Detchd Unf TA TA Y WD Abnorml 4 140000
4 RL Pave IR1 Lvl AllPub FR2 Gtl NoRidge Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA Av GLQ Unf GasA Ex Y SBrkr Gd Typ TA Attchd RFn TA TA Y WD Normal 5 250000
In [16]:
first_column = df_c.pop('Id')
In [17]:
df_c.insert(0, 'Id', first_column)
df_c.head()
Out[17]:
Id MSZoning Street LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2 Heating HeatingQC CentralAir Electrical KitchenQual Functional FireplaceQu GarageType GarageFinish GarageQual GarageCond PavedDrive SaleType SaleCondition SalePrice
0 1 RL Pave Reg Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA No GLQ Unf GasA Ex Y SBrkr Gd Typ NaN Attchd RFn TA TA Y WD Normal 208500
1 2 RL Pave Reg Lvl AllPub FR2 Gtl Veenker Feedr Norm 1Fam 1Story Gable CompShg MetalSd MetalSd None TA TA CBlock Gd TA Gd ALQ Unf GasA Ex Y SBrkr TA Typ TA Attchd RFn TA TA Y WD Normal 181500
2 3 RL Pave IR1 Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA Mn GLQ Unf GasA Ex Y SBrkr Gd Typ TA Attchd RFn TA TA Y WD Normal 223500
3 4 RL Pave IR1 Lvl AllPub Corner Gtl Crawfor Norm Norm 1Fam 2Story Gable CompShg Wd Sdng Wd Shng None TA TA BrkTil TA Gd No ALQ Unf GasA Gd Y SBrkr Gd Typ Gd Detchd Unf TA TA Y WD Abnorml 140000
4 5 RL Pave IR1 Lvl AllPub FR2 Gtl NoRidge Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA Av GLQ Unf GasA Ex Y SBrkr Gd Typ TA Attchd RFn TA TA Y WD Normal 250000

EDA of Numerical variables¶

In [18]:
df_n.describe()
Out[18]:
Id MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 BsmtFinSF2 BsmtUnfSF TotalBsmtSF 1stFlrSF 2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath BsmtHalfBath FullBath HalfBath Bedroom Kitchen TotRmsAbvGrd Fireplaces GarageYrBlt GarageCars GarageArea WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea MiscVal MoSold YrSold SalePrice
count 1460.000000 1460.000000 1201.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1452.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1379.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000
mean 730.500000 56.897260 70.049958 10516.828082 6.099315 5.575342 1971.267808 1984.865753 103.685262 443.639726 46.549315 567.240411 1057.429452 1162.626712 346.992466 5.844521 1515.463699 0.425342 0.057534 1.565068 0.382877 2.866438 1.046575 6.517808 0.613014 1978.506164 1.767123 472.980137 94.244521 46.660274 21.954110 3.409589 15.060959 2.758904 43.489041 6.321918 2007.815753 180921.195890
std 421.610009 42.300571 24.284752 9981.264932 1.382997 1.112799 30.202904 20.645407 181.066207 456.098091 161.319273 441.866955 438.705324 386.587738 436.528436 48.623081 525.480383 0.518911 0.238753 0.550916 0.502885 0.815778 0.220338 1.625393 0.644666 24.689725 0.747315 213.804841 125.338794 66.256028 61.119149 29.317331 55.757415 40.177307 496.123024 2.703626 1.328095 79442.502883
min 1.000000 20.000000 21.000000 1300.000000 1.000000 1.000000 1872.000000 1950.000000 0.000000 0.000000 0.000000 0.000000 0.000000 334.000000 0.000000 0.000000 334.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 0.000000 1900.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 2006.000000 34900.000000
25% 365.750000 20.000000 59.000000 7553.500000 5.000000 5.000000 1954.000000 1967.000000 0.000000 0.000000 0.000000 223.000000 795.750000 882.000000 0.000000 0.000000 1129.500000 0.000000 0.000000 1.000000 0.000000 2.000000 1.000000 5.000000 0.000000 1961.000000 1.000000 334.500000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 5.000000 2007.000000 129975.000000
50% 730.500000 50.000000 69.000000 9478.500000 6.000000 5.000000 1973.000000 1994.000000 0.000000 383.500000 0.000000 477.500000 991.500000 1087.000000 0.000000 0.000000 1464.000000 0.000000 0.000000 2.000000 0.000000 3.000000 1.000000 6.000000 1.000000 1980.000000 2.000000 480.000000 0.000000 25.000000 0.000000 0.000000 0.000000 0.000000 0.000000 6.000000 2008.000000 163000.000000
75% 1095.250000 70.000000 80.000000 11601.500000 7.000000 6.000000 2000.000000 2004.000000 166.000000 712.250000 0.000000 808.000000 1298.250000 1391.250000 728.000000 0.000000 1776.750000 1.000000 0.000000 2.000000 1.000000 3.000000 1.000000 7.000000 1.000000 2002.000000 2.000000 576.000000 168.000000 68.000000 0.000000 0.000000 0.000000 0.000000 0.000000 8.000000 2009.000000 214000.000000
max 1460.000000 190.000000 313.000000 215245.000000 10.000000 9.000000 2010.000000 2010.000000 1600.000000 5644.000000 1474.000000 2336.000000 6110.000000 4692.000000 2065.000000 572.000000 5642.000000 3.000000 2.000000 3.000000 2.000000 8.000000 3.000000 14.000000 3.000000 2010.000000 4.000000 1418.000000 857.000000 547.000000 552.000000 508.000000 480.000000 738.000000 15500.000000 12.000000 2010.000000 755000.000000
In [19]:
df_n.corr()
Out[19]:
Id MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 BsmtFinSF2 BsmtUnfSF TotalBsmtSF 1stFlrSF 2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath BsmtHalfBath FullBath HalfBath Bedroom Kitchen TotRmsAbvGrd Fireplaces GarageYrBlt GarageCars GarageArea WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea MiscVal MoSold YrSold SalePrice
Id 1.000000 0.011156 -0.010601 -0.033226 -0.028365 0.012609 -0.012713 -0.021998 -0.050298 -0.005024 -0.005968 -0.007940 -0.015415 0.010496 0.005590 -0.044230 0.008273 0.002289 -0.020155 0.005587 0.006784 0.037719 0.002951 0.027239 -0.019772 0.000072 0.016570 0.017634 -0.029643 -0.000477 0.002889 -0.046635 0.001330 0.057044 -0.006242 0.021172 0.000712 -0.021917
MSSubClass 0.011156 1.000000 -0.386347 -0.139781 0.032628 -0.059316 0.027850 0.040581 0.022936 -0.069836 -0.065649 -0.140759 -0.238518 -0.251758 0.307886 0.046474 0.074853 0.003491 -0.002333 0.131608 0.177354 -0.023438 0.281721 0.040380 -0.045569 0.085072 -0.040110 -0.098672 -0.012579 -0.006100 -0.012037 -0.043825 -0.026030 0.008283 -0.007683 -0.013585 -0.021407 -0.084284
LotFrontage -0.010601 -0.386347 1.000000 0.426095 0.251646 -0.059213 0.123349 0.088866 0.193458 0.233633 0.049900 0.132644 0.392075 0.457181 0.080177 0.038469 0.402797 0.100949 -0.007234 0.198769 0.053532 0.263170 -0.006069 0.352096 0.266639 0.070250 0.285691 0.344997 0.088521 0.151972 0.010700 0.070029 0.041383 0.206167 0.003368 0.011200 0.007450 0.351799
LotArea -0.033226 -0.139781 0.426095 1.000000 0.105806 -0.005636 0.014228 0.013788 0.104160 0.214103 0.111170 -0.002618 0.260833 0.299475 0.050986 0.004779 0.263116 0.158155 0.048046 0.126031 0.014259 0.119690 -0.017784 0.190015 0.271364 -0.024947 0.154871 0.180403 0.171698 0.084774 -0.018340 0.020423 0.043160 0.077672 0.038068 0.001205 -0.014261 0.263843
OverallQual -0.028365 0.032628 0.251646 0.105806 1.000000 -0.091932 0.572323 0.550684 0.411876 0.239666 -0.059119 0.308159 0.537808 0.476224 0.295493 -0.030429 0.593007 0.111098 -0.040150 0.550600 0.273458 0.101676 -0.183882 0.427452 0.396765 0.547766 0.600671 0.562022 0.238923 0.308819 -0.113937 0.030371 0.064886 0.065166 -0.031406 0.070815 -0.027347 0.790982
OverallCond 0.012609 -0.059316 -0.059213 -0.005636 -0.091932 1.000000 -0.375983 0.073741 -0.128101 -0.046231 0.040229 -0.136841 -0.171098 -0.144203 0.028942 0.025494 -0.079686 -0.054942 0.117821 -0.194149 -0.060769 0.012980 -0.087001 -0.057583 -0.023820 -0.324297 -0.185758 -0.151521 -0.003334 -0.032589 0.070356 0.025504 0.054811 -0.001985 0.068777 -0.003511 0.043950 -0.077856
YearBuilt -0.012713 0.027850 0.123349 0.014228 0.572323 -0.375983 1.000000 0.592855 0.315707 0.249503 -0.049107 0.149040 0.391452 0.281986 0.010308 -0.183784 0.199010 0.187599 -0.038162 0.468271 0.242656 -0.070651 -0.174800 0.095589 0.147716 0.825667 0.537850 0.478954 0.224880 0.188686 -0.387268 0.031355 -0.050364 0.004950 -0.034383 0.012398 -0.013618 0.522897
YearRemodAdd -0.021998 0.040581 0.088866 0.013788 0.550684 0.073741 0.592855 1.000000 0.179618 0.128451 -0.067759 0.181133 0.291066 0.240379 0.140024 -0.062419 0.287389 0.119470 -0.012337 0.439046 0.183331 -0.040581 -0.149598 0.191740 0.112581 0.642277 0.420622 0.371600 0.205726 0.226298 -0.193919 0.045286 -0.038740 0.005829 -0.010286 0.021490 0.035743 0.507101
MasVnrArea -0.050298 0.022936 0.193458 0.104160 0.411876 -0.128101 0.315707 0.179618 1.000000 0.264736 -0.072319 0.114442 0.363936 0.344501 0.174561 -0.069071 0.390857 0.085310 0.026673 0.276833 0.201444 0.102821 -0.037610 0.280682 0.249070 0.252691 0.364204 0.373066 0.159718 0.125703 -0.110204 0.018796 0.061466 0.011723 -0.029815 -0.005965 -0.008201 0.477493
BsmtFinSF1 -0.005024 -0.069836 0.233633 0.214103 0.239666 -0.046231 0.249503 0.128451 0.264736 1.000000 -0.050117 -0.495251 0.522396 0.445863 -0.137079 -0.064503 0.208171 0.649212 0.067418 0.058543 0.004262 -0.107355 -0.081007 0.044316 0.260011 0.153484 0.224054 0.296970 0.204306 0.111761 -0.102303 0.026451 0.062021 0.140491 0.003571 -0.015727 0.014359 0.386420
BsmtFinSF2 -0.005968 -0.065649 0.049900 0.111170 -0.059119 0.040229 -0.049107 -0.067759 -0.072319 -0.050117 1.000000 -0.209294 0.104810 0.097117 -0.099260 0.014807 -0.009640 0.158678 0.070948 -0.076444 -0.032148 -0.015728 -0.040751 -0.035227 0.046921 -0.088011 -0.038264 -0.018227 0.067898 0.003093 0.036543 -0.029993 0.088871 0.041709 0.004940 -0.015211 0.031706 -0.011378
BsmtUnfSF -0.007940 -0.140759 0.132644 -0.002618 0.308159 -0.136841 0.149040 0.181133 0.114442 -0.495251 -0.209294 1.000000 0.415360 0.317987 0.004469 0.028167 0.240257 -0.422900 -0.095804 0.288886 -0.041118 0.166643 0.030086 0.250647 0.051575 0.190708 0.214175 0.183303 -0.005316 0.129005 -0.002538 0.020764 -0.012579 -0.035092 -0.023837 0.034888 -0.041258 0.214479
TotalBsmtSF -0.015415 -0.238518 0.392075 0.260833 0.537808 -0.171098 0.391452 0.291066 0.363936 0.522396 0.104810 0.415360 1.000000 0.819530 -0.174512 -0.033245 0.454868 0.307351 -0.000315 0.323722 -0.048804 0.050450 -0.068901 0.285573 0.339519 0.322445 0.434585 0.486665 0.232019 0.247264 -0.095478 0.037384 0.084489 0.126053 -0.018479 0.013196 -0.014969 0.613581
1stFlrSF 0.010496 -0.251758 0.457181 0.299475 0.476224 -0.144203 0.281986 0.240379 0.344501 0.445863 0.097117 0.317987 0.819530 1.000000 -0.202646 -0.014241 0.566024 0.244671 0.001956 0.380637 -0.119916 0.127401 0.068101 0.409516 0.410531 0.233449 0.439317 0.489782 0.235459 0.211671 -0.065292 0.056104 0.088758 0.131525 -0.021096 0.031372 -0.013604 0.605852
2ndFlrSF 0.005590 0.307886 0.080177 0.050986 0.295493 0.028942 0.010308 0.140024 0.174561 -0.137079 -0.099260 0.004469 -0.174512 -0.202646 1.000000 0.063353 0.687501 -0.169494 -0.023855 0.421378 0.609707 0.502901 0.059306 0.616423 0.194561 0.070832 0.183926 0.138347 0.092165 0.208026 0.061989 -0.024358 0.040606 0.081487 0.016197 0.035164 -0.028700 0.319334
LowQualFinSF -0.044230 0.046474 0.038469 0.004779 -0.030429 0.025494 -0.183784 -0.062419 -0.069071 -0.064503 0.014807 0.028167 -0.033245 -0.014241 0.063353 1.000000 0.134683 -0.047143 -0.005842 -0.000710 -0.027080 0.105607 0.007522 0.131185 -0.021272 -0.036363 -0.094480 -0.067601 -0.025444 0.018251 0.061081 -0.004296 0.026799 0.062157 -0.003793 -0.022174 -0.028921 -0.025606
GrLivArea 0.008273 0.074853 0.402797 0.263116 0.593007 -0.079686 0.199010 0.287389 0.390857 0.208171 -0.009640 0.240257 0.454868 0.566024 0.687501 0.134683 1.000000 0.034836 -0.018918 0.630012 0.415772 0.521270 0.100063 0.825489 0.461679 0.231197 0.467247 0.468997 0.247433 0.330224 0.009113 0.020643 0.101510 0.170205 -0.002416 0.050240 -0.036526 0.708624
BsmtFullBath 0.002289 0.003491 0.100949 0.158155 0.111098 -0.054942 0.187599 0.119470 0.085310 0.649212 0.158678 -0.422900 0.307351 0.244671 -0.169494 -0.047143 0.034836 1.000000 -0.147871 -0.064512 -0.030905 -0.150673 -0.041503 -0.053275 0.137928 0.124553 0.131881 0.179189 0.175315 0.067341 -0.049911 -0.000106 0.023148 0.067616 -0.023047 -0.025361 0.067049 0.227122
BsmtHalfBath -0.020155 -0.002333 -0.007234 0.048046 -0.040150 0.117821 -0.038162 -0.012337 0.026673 0.067418 0.070948 -0.095804 -0.000315 0.001956 -0.023855 -0.005842 -0.018918 -0.147871 1.000000 -0.054536 -0.012340 0.046519 -0.037944 -0.023836 0.028976 -0.077464 -0.020891 -0.024536 0.040161 -0.025324 -0.008555 0.035114 0.032121 0.020025 -0.007367 0.032873 -0.046524 -0.016844
FullBath 0.005587 0.131608 0.198769 0.126031 0.550600 -0.194149 0.468271 0.439046 0.276833 0.058543 -0.076444 0.288886 0.323722 0.380637 0.421378 -0.000710 0.630012 -0.064512 -0.054536 1.000000 0.136381 0.363252 0.133115 0.554784 0.243671 0.484557 0.469672 0.405656 0.187703 0.259977 -0.115093 0.035353 -0.008106 0.049604 -0.014290 0.055872 -0.019669 0.560664
HalfBath 0.006784 0.177354 0.053532 0.014259 0.273458 -0.060769 0.242656 0.183331 0.201444 0.004262 -0.032148 -0.041118 -0.048804 -0.119916 0.609707 -0.027080 0.415772 -0.030905 -0.012340 0.136381 1.000000 0.226651 -0.068263 0.343415 0.203649 0.196785 0.219178 0.163549 0.108080 0.199740 -0.095317 -0.004972 0.072426 0.022381 0.001290 -0.009050 -0.010269 0.284108
Bedroom 0.037719 -0.023438 0.263170 0.119690 0.101676 0.012980 -0.070651 -0.040581 0.102821 -0.107355 -0.015728 0.166643 0.050450 0.127401 0.502901 0.105607 0.521270 -0.150673 0.046519 0.363252 0.226651 1.000000 0.198597 0.676620 0.107570 -0.064518 0.086106 0.065253 0.046854 0.093810 0.041570 -0.024478 0.044300 0.070703 0.007767 0.046544 -0.036014 0.168213
Kitchen 0.002951 0.281721 -0.006069 -0.017784 -0.183882 -0.087001 -0.174800 -0.149598 -0.037610 -0.081007 -0.040751 0.030086 -0.068901 0.068101 0.059306 0.007522 0.100063 -0.041503 -0.037944 0.133115 -0.068263 0.198597 1.000000 0.256045 -0.123936 -0.124411 -0.050634 -0.064433 -0.090130 -0.070091 0.037312 -0.024600 -0.051613 -0.014525 0.062341 0.026589 0.031687 -0.135907
TotRmsAbvGrd 0.027239 0.040380 0.352096 0.190015 0.427452 -0.057583 0.095589 0.191740 0.280682 0.044316 -0.035227 0.250647 0.285573 0.409516 0.616423 0.131185 0.825489 -0.053275 -0.023836 0.554784 0.343415 0.676620 0.256045 1.000000 0.326114 0.148112 0.362289 0.337822 0.165984 0.234192 0.004151 -0.006683 0.059383 0.083757 0.024763 0.036907 -0.034516 0.533723
Fireplaces -0.019772 -0.045569 0.266639 0.271364 0.396765 -0.023820 0.147716 0.112581 0.249070 0.260011 0.046921 0.051575 0.339519 0.410531 0.194561 -0.021272 0.461679 0.137928 0.028976 0.243671 0.203649 0.107570 -0.123936 0.326114 1.000000 0.046822 0.300789 0.269141 0.200019 0.169405 -0.024822 0.011257 0.184530 0.095074 0.001409 0.046357 -0.024096 0.466929
GarageYrBlt 0.000072 0.085072 0.070250 -0.024947 0.547766 -0.324297 0.825667 0.642277 0.252691 0.153484 -0.088011 0.190708 0.322445 0.233449 0.070832 -0.036363 0.231197 0.124553 -0.077464 0.484557 0.196785 -0.064518 -0.124411 0.148112 0.046822 1.000000 0.588920 0.564567 0.224577 0.228425 -0.297003 0.023544 -0.075418 -0.014501 -0.032417 0.005337 -0.001014 0.486362
GarageCars 0.016570 -0.040110 0.285691 0.154871 0.600671 -0.185758 0.537850 0.420622 0.364204 0.224054 -0.038264 0.214175 0.434585 0.439317 0.183926 -0.094480 0.467247 0.131881 -0.020891 0.469672 0.219178 0.086106 -0.050634 0.362289 0.300789 0.588920 1.000000 0.882475 0.226342 0.213569 -0.151434 0.035765 0.050494 0.020934 -0.043080 0.040522 -0.039117 0.640409
GarageArea 0.017634 -0.098672 0.344997 0.180403 0.562022 -0.151521 0.478954 0.371600 0.373066 0.296970 -0.018227 0.183303 0.486665 0.489782 0.138347 -0.067601 0.468997 0.179189 -0.024536 0.405656 0.163549 0.065253 -0.064433 0.337822 0.269141 0.564567 0.882475 1.000000 0.224666 0.241435 -0.121777 0.035087 0.051412 0.061047 -0.027400 0.027974 -0.027378 0.623431
WoodDeckSF -0.029643 -0.012579 0.088521 0.171698 0.238923 -0.003334 0.224880 0.205726 0.159718 0.204306 0.067898 -0.005316 0.232019 0.235459 0.092165 -0.025444 0.247433 0.175315 0.040161 0.187703 0.108080 0.046854 -0.090130 0.165984 0.200019 0.224577 0.226342 0.224666 1.000000 0.058661 -0.125989 -0.032771 -0.074181 0.073378 -0.009551 0.021011 0.022270 0.324413
OpenPorchSF -0.000477 -0.006100 0.151972 0.084774 0.308819 -0.032589 0.188686 0.226298 0.125703 0.111761 0.003093 0.129005 0.247264 0.211671 0.208026 0.018251 0.330224 0.067341 -0.025324 0.259977 0.199740 0.093810 -0.070091 0.234192 0.169405 0.228425 0.213569 0.241435 0.058661 1.000000 -0.093079 -0.005842 0.074304 0.060762 -0.018584 0.071255 -0.057619 0.315856
EnclosedPorch 0.002889 -0.012037 0.010700 -0.018340 -0.113937 0.070356 -0.387268 -0.193919 -0.110204 -0.102303 0.036543 -0.002538 -0.095478 -0.065292 0.061989 0.061081 0.009113 -0.049911 -0.008555 -0.115093 -0.095317 0.041570 0.037312 0.004151 -0.024822 -0.297003 -0.151434 -0.121777 -0.125989 -0.093079 1.000000 -0.037305 -0.082864 0.054203 0.018361 -0.028887 -0.009916 -0.128578
3SsnPorch -0.046635 -0.043825 0.070029 0.020423 0.030371 0.025504 0.031355 0.045286 0.018796 0.026451 -0.029993 0.020764 0.037384 0.056104 -0.024358 -0.004296 0.020643 -0.000106 0.035114 0.035353 -0.004972 -0.024478 -0.024600 -0.006683 0.011257 0.023544 0.035765 0.035087 -0.032771 -0.005842 -0.037305 1.000000 -0.031436 -0.007992 0.000354 0.029474 0.018645 0.044584
ScreenPorch 0.001330 -0.026030 0.041383 0.043160 0.064886 0.054811 -0.050364 -0.038740 0.061466 0.062021 0.088871 -0.012579 0.084489 0.088758 0.040606 0.026799 0.101510 0.023148 0.032121 -0.008106 0.072426 0.044300 -0.051613 0.059383 0.184530 -0.075418 0.050494 0.051412 -0.074181 0.074304 -0.082864 -0.031436 1.000000 0.051307 0.031946 0.023217 0.010694 0.111447
PoolArea 0.057044 0.008283 0.206167 0.077672 0.065166 -0.001985 0.004950 0.005829 0.011723 0.140491 0.041709 -0.035092 0.126053 0.131525 0.081487 0.062157 0.170205 0.067616 0.020025 0.049604 0.022381 0.070703 -0.014525 0.083757 0.095074 -0.014501 0.020934 0.061047 0.073378 0.060762 0.054203 -0.007992 0.051307 1.000000 0.029669 -0.033737 -0.059689 0.092404
MiscVal -0.006242 -0.007683 0.003368 0.038068 -0.031406 0.068777 -0.034383 -0.010286 -0.029815 0.003571 0.004940 -0.023837 -0.018479 -0.021096 0.016197 -0.003793 -0.002416 -0.023047 -0.007367 -0.014290 0.001290 0.007767 0.062341 0.024763 0.001409 -0.032417 -0.043080 -0.027400 -0.009551 -0.018584 0.018361 0.000354 0.031946 0.029669 1.000000 -0.006495 0.004906 -0.021190
MoSold 0.021172 -0.013585 0.011200 0.001205 0.070815 -0.003511 0.012398 0.021490 -0.005965 -0.015727 -0.015211 0.034888 0.013196 0.031372 0.035164 -0.022174 0.050240 -0.025361 0.032873 0.055872 -0.009050 0.046544 0.026589 0.036907 0.046357 0.005337 0.040522 0.027974 0.021011 0.071255 -0.028887 0.029474 0.023217 -0.033737 -0.006495 1.000000 -0.145721 0.046432
YrSold 0.000712 -0.021407 0.007450 -0.014261 -0.027347 0.043950 -0.013618 0.035743 -0.008201 0.014359 0.031706 -0.041258 -0.014969 -0.013604 -0.028700 -0.028921 -0.036526 0.067049 -0.046524 -0.019669 -0.010269 -0.036014 0.031687 -0.034516 -0.024096 -0.001014 -0.039117 -0.027378 0.022270 -0.057619 -0.009916 0.018645 0.010694 -0.059689 0.004906 -0.145721 1.000000 -0.028923
SalePrice -0.021917 -0.084284 0.351799 0.263843 0.790982 -0.077856 0.522897 0.507101 0.477493 0.386420 -0.011378 0.214479 0.613581 0.605852 0.319334 -0.025606 0.708624 0.227122 -0.016844 0.560664 0.284108 0.168213 -0.135907 0.533723 0.466929 0.486362 0.640409 0.623431 0.324413 0.315856 -0.128578 0.044584 0.111447 0.092404 -0.021190 0.046432 -0.028923 1.000000

Correlation in a Heatmap¶

In [20]:
sns.set(rc={'figure.figsize':(30,30)})
sns.heatmap(data=df_n.corr(), square=True, annot=True, fmt='.2g', cmap='summer')
Out[20]:
<AxesSubplot: >

Correllation lower than 0.3¶

In [21]:
df_n_high = df_n.loc[:, df_n.corr().abs()['SalePrice'] > 0.3]
df_n_high['Id'] = df_n['Id']
first_column = df_n_high.pop('Id')
df_n_high.insert(0, 'Id', first_column)
In [22]:
df_n_high.head()
Out[22]:
Id LotFrontage OverallQual YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 TotalBsmtSF 1stFlrSF 2ndFlrSF GrLivArea FullBath TotRmsAbvGrd Fireplaces GarageYrBlt GarageCars GarageArea WoodDeckSF OpenPorchSF SalePrice
0 1 65.0 7 2003 2003 196.0 706 856 856 854 1710 2 8 0 2003.0 2 548 0 61 208500
1 2 80.0 6 1976 1976 0.0 978 1262 1262 0 1262 2 6 1 1976.0 2 460 298 0 181500
2 3 68.0 7 2001 2002 162.0 486 920 920 866 1786 2 6 1 2001.0 2 608 0 42 223500
3 4 60.0 7 1915 1970 0.0 216 756 961 756 1717 1 7 1 1998.0 3 642 0 35 140000
4 5 84.0 8 2000 2000 350.0 655 1145 1145 1053 2198 2 9 1 2000.0 3 836 192 84 250000
In [23]:
sns.set(rc={'figure.figsize':(30,30)})
sns.heatmap(data=df_n_high.corr(), square=True, annot=True, fmt='.2g',  cmap="summer")
Out[23]:
<AxesSubplot: >
In [24]:
df_n = df_n_high
In [25]:
null_columns = df_n.isna().sum()
null_columns = null_columns[null_columns > 0]
null_columns
Out[25]:
LotFrontage    259
MasVnrArea       8
GarageYrBlt     81
dtype: int64
In [26]:
list = null_columns.index[0:].tolist()
print(list)
['LotFrontage', 'MasVnrArea', 'GarageYrBlt']

Replacing null vales with mean¶

In [27]:
def replaceNan(list, df):
    for i in list:
        print('UPDATING: ' + str(i))
        df[i].fillna(value=df[i].mean().astype('float'), inplace=True)
    return df

df_n = replaceNan(list,df_n)
df_n.head()
UPDATING: LotFrontage
UPDATING: MasVnrArea
UPDATING: GarageYrBlt
Out[27]:
Id LotFrontage OverallQual YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 TotalBsmtSF 1stFlrSF 2ndFlrSF GrLivArea FullBath TotRmsAbvGrd Fireplaces GarageYrBlt GarageCars GarageArea WoodDeckSF OpenPorchSF SalePrice
0 1 65.0 7 2003 2003 196.0 706 856 856 854 1710 2 8 0 2003.0 2 548 0 61 208500
1 2 80.0 6 1976 1976 0.0 978 1262 1262 0 1262 2 6 1 1976.0 2 460 298 0 181500
2 3 68.0 7 2001 2002 162.0 486 920 920 866 1786 2 6 1 2001.0 2 608 0 42 223500
3 4 60.0 7 1915 1970 0.0 216 756 961 756 1717 1 7 1 1998.0 3 642 0 35 140000
4 5 84.0 8 2000 2000 350.0 655 1145 1145 1053 2198 2 9 1 2000.0 3 836 192 84 250000
In [28]:
df_n.head()
Out[28]:
Id LotFrontage OverallQual YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 TotalBsmtSF 1stFlrSF 2ndFlrSF GrLivArea FullBath TotRmsAbvGrd Fireplaces GarageYrBlt GarageCars GarageArea WoodDeckSF OpenPorchSF SalePrice
0 1 65.0 7 2003 2003 196.0 706 856 856 854 1710 2 8 0 2003.0 2 548 0 61 208500
1 2 80.0 6 1976 1976 0.0 978 1262 1262 0 1262 2 6 1 1976.0 2 460 298 0 181500
2 3 68.0 7 2001 2002 162.0 486 920 920 866 1786 2 6 1 2001.0 2 608 0 42 223500
3 4 60.0 7 1915 1970 0.0 216 756 961 756 1717 1 7 1 1998.0 3 642 0 35 140000
4 5 84.0 8 2000 2000 350.0 655 1145 1145 1053 2198 2 9 1 2000.0 3 836 192 84 250000
In [29]:
null_columns = df_n.isna().sum()
null_columns = null_columns[null_columns > 0]
null_columns
Out[29]:
Series([], dtype: int64)

Outliers in boxplots¶

In [30]:
def boxplotloop(df, columns):
    for col in columns:
        if df[col].dtype != object:
            sns.set(rc={'figure.figsize':(11.7,8.27)})
            sns.boxplot(df[col])
            plt.show()
        
            
            
boxplotloop(df_n, df_n.describe().columns)

Cleaning up outliers¶

In [31]:
def cleanup_outliers(df,columns):
    
    for col in columns:
        print('Working on column: {}'.format(col))
        if (df[col].dtype != object) :
            q1,q3 = np.percentile(df[col], [25,75])
            iqr = q3-q1
            minv = q1-(1.5*iqr)  
            maxv = q3+(1.5*iqr)  
            med  = df[col].median()
            df[col] = np.where(df[col]>maxv , maxv, df[col]).astype(df[col].dtype)
            df[col] = np.where(df[col]<minv , minv, df[col]).astype(df[col].dtype)
        
    return df
In [32]:
df2_n = cleanup_outliers(df_n, df_n.columns)
Working on column: Id
Working on column: LotFrontage
Working on column: OverallQual
Working on column: YearBuilt
Working on column: YearRemodAdd
Working on column: MasVnrArea
Working on column: BsmtFinSF1
Working on column: TotalBsmtSF
Working on column: 1stFlrSF
Working on column: 2ndFlrSF
Working on column: GrLivArea
Working on column: FullBath
Working on column: TotRmsAbvGrd
Working on column: Fireplaces
Working on column: GarageYrBlt
Working on column: GarageCars
Working on column: GarageArea
Working on column: WoodDeckSF
Working on column: OpenPorchSF
Working on column: SalePrice
In [33]:
boxplotloop(df2_n, df2_n.describe().columns)
In [34]:
df2_n.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 20 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Id            1460 non-null   int64  
 1   LotFrontage   1460 non-null   float64
 2   OverallQual   1460 non-null   int64  
 3   YearBuilt     1460 non-null   int64  
 4   YearRemodAdd  1460 non-null   int64  
 5   MasVnrArea    1460 non-null   float64
 6   BsmtFinSF1    1460 non-null   int64  
 7   TotalBsmtSF   1460 non-null   int64  
 8   1stFlrSF      1460 non-null   int64  
 9   2ndFlrSF      1460 non-null   int64  
 10  GrLivArea     1460 non-null   int64  
 11  FullBath      1460 non-null   int64  
 12  TotRmsAbvGrd  1460 non-null   int64  
 13  Fireplaces    1460 non-null   int64  
 14  GarageYrBlt   1460 non-null   float64
 15  GarageCars    1460 non-null   int64  
 16  GarageArea    1460 non-null   int64  
 17  WoodDeckSF    1460 non-null   int64  
 18  OpenPorchSF   1460 non-null   int64  
 19  SalePrice     1460 non-null   int64  
dtypes: float64(3), int64(17)
memory usage: 228.2 KB
In [35]:
sns.set(rc={'figure.figsize':(30,30)})
color = plt.get_cmap('summer')
color.set_bad('lightblue')
sns.heatmap(data=df2_n.corr(), square=True, annot=True, fmt='.2g',  cmap= color)
Out[35]:
<AxesSubplot: >
In [36]:
df2_n.head(50)
Out[36]:
Id LotFrontage OverallQual YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 TotalBsmtSF 1stFlrSF 2ndFlrSF GrLivArea FullBath TotRmsAbvGrd Fireplaces GarageYrBlt GarageCars GarageArea WoodDeckSF OpenPorchSF SalePrice
0 1 65.000000 7 2003 2003 196.000 706 856 856 854 1710 2 8 0 2003.000000 2 548 0 61 208500
1 2 80.000000 6 1976 1976 0.000 978 1262 1262 0 1262 2 6 1 1976.000000 2 460 298 0 181500
2 3 68.000000 7 2001 2002 162.000 486 920 920 866 1786 2 6 1 2001.000000 2 608 0 42 223500
3 4 60.000000 7 1915 1970 0.000 216 756 961 756 1717 1 7 1 1998.000000 3 642 0 35 140000
4 5 84.000000 8 2000 2000 350.000 655 1145 1145 1053 2198 2 9 1 2000.000000 3 836 192 84 250000
5 6 85.000000 5 1993 1995 0.000 732 796 796 566 1362 1 5 0 1993.000000 2 480 40 30 143000
6 7 75.000000 8 2004 2005 186.000 1369 1686 1694 0 1694 2 7 1 2004.000000 2 636 255 57 307000
7 8 70.049958 7 1973 1973 240.000 859 1107 1107 983 2090 2 7 2 1973.000000 2 484 235 170 200000
8 9 51.000000 7 1931 1950 0.000 0 952 1022 752 1774 2 8 2 1931.000000 2 468 90 0 129900
9 10 50.000000 5 1939 1950 0.000 851 991 1077 0 1077 1 5 2 1939.000000 1 205 0 4 118000
10 11 70.000000 5 1965 1965 0.000 906 1040 1040 0 1040 1 5 0 1965.000000 1 384 0 0 129500
11 12 85.000000 9 2005 2006 286.000 998 1175 1182 1142 2324 3 10 2 2005.000000 3 736 147 21 340037
12 13 70.049958 5 1962 1962 0.000 737 912 912 0 912 1 4 0 1962.000000 1 352 140 0 144000
13 14 91.000000 7 2006 2007 306.000 0 1494 1494 0 1494 2 7 1 2006.000000 3 840 160 33 279500
14 15 70.049958 6 1960 1960 212.000 733 1253 1253 0 1253 1 5 1 1960.000000 1 352 0 170 157000
15 16 51.000000 7 1929 2001 0.000 0 832 854 0 854 1 5 0 1991.000000 2 576 48 112 132000
16 17 70.049958 6 1970 1970 180.000 578 1004 1004 0 1004 1 5 1 1970.000000 2 480 0 0 149000
17 18 72.000000 4 1967 1967 0.000 0 42 1296 0 1296 2 6 0 1967.000000 2 516 0 0 90000
18 19 66.000000 5 2004 2004 0.000 646 1114 1114 0 1114 1 6 0 2004.000000 2 576 0 102 159000
19 20 70.000000 5 1958 1965 0.000 504 1029 1339 0 1339 1 6 0 1958.000000 1 294 0 0 139000
20 21 101.000000 8 2005 2006 380.000 0 1158 1158 1218 2376 3 9 1 2005.000000 3 853 240 154 325300
21 22 57.000000 7 1930 1950 0.000 0 637 1108 0 1108 1 6 1 1930.000000 1 280 0 0 139400
22 23 75.000000 8 2002 2002 281.000 0 1777 1795 0 1795 2 7 1 2002.000000 2 534 171 159 230000
23 24 44.000000 5 1976 1976 0.000 840 1040 1060 0 1060 1 6 1 1976.000000 2 572 100 110 129900
24 25 70.049958 5 1968 2001 0.000 188 1060 1060 0 1060 1 6 1 1968.000000 1 270 406 90 154000
25 26 107.500000 8 2007 2007 410.625 0 1566 1600 0 1600 2 7 1 2007.000000 3 890 0 56 256300
26 27 60.000000 5 1951 2000 0.000 234 900 900 0 900 1 5 0 2005.000000 2 576 222 32 134800
27 28 98.000000 8 2007 2008 200.000 1218 1704 1704 0 1704 2 7 1 2008.000000 3 772 0 50 306000
28 29 47.000000 5 1957 1997 0.000 1277 1484 1600 0 1600 1 6 2 1957.000000 1 319 288 170 207500
29 30 60.000000 4 1927 1950 0.000 0 520 520 0 520 1 4 0 1920.000000 1 240 49 0 68500
30 31 50.000000 4 1920 1950 0.000 0 649 649 668 1317 1 6 0 1920.000000 1 250 0 54 40000
31 32 70.049958 5 1966 2006 0.000 0 1228 1228 0 1228 1 6 0 1966.000000 1 271 0 65 149350
32 33 85.000000 8 2007 2007 0.000 0 1234 1234 0 1234 2 7 0 2007.000000 2 484 0 30 179900
33 34 70.000000 5 1959 1959 0.000 1018 1398 1700 0 1700 1 6 1 1959.000000 2 447 0 38 165500
34 35 60.000000 9 2005 2005 246.000 1153 1561 1561 0 1561 2 6 1 2005.000000 2 556 203 47 277500
35 36 107.500000 8 2004 2005 132.000 0 1117 1132 1320 2452 3 9 1 2004.000000 3 691 113 32 309000
36 37 107.500000 5 1994 1995 0.000 0 1097 1097 0 1097 1 6 0 1995.000000 2 672 392 64 145000
37 38 74.000000 5 1954 1990 410.625 1213 1297 1297 0 1297 1 5 1 1954.000000 2 498 0 0 153000
38 39 68.000000 5 1953 2007 0.000 731 1057 1057 0 1057 1 5 0 1953.000000 1 246 0 52 109000
39 40 65.000000 4 1955 1955 0.000 0 42 1152 0 1152 2 6 0 1978.506164 0 0 0 0 82000
40 41 84.000000 6 1965 1965 101.000 643 1088 1324 0 1324 2 6 1 1965.000000 2 440 0 138 160000
41 42 107.500000 5 1959 1959 0.000 967 1350 1328 0 1328 1 5 2 1959.000000 1 308 0 104 170000
42 43 70.049958 5 1983 1983 0.000 747 840 884 0 884 1 5 0 1983.000000 2 504 240 0 144000
43 44 70.049958 5 1975 1980 0.000 280 938 938 0 938 1 5 0 1977.000000 1 308 145 0 130250
44 45 70.000000 5 1959 1959 0.000 179 1150 1150 0 1150 1 6 0 1959.000000 1 300 0 0 141000
45 46 61.000000 9 2005 2005 410.625 456 1752 1752 0 1752 2 6 1 2005.000000 2 576 196 82 319900
46 47 48.000000 7 2003 2003 0.000 1351 1434 1518 631 2149 1 6 1 2003.000000 2 670 168 43 239686
47 48 84.000000 8 2006 2006 0.000 24 1656 1656 0 1656 2 7 0 2006.000000 3 826 0 146 249700
48 49 33.000000 4 1920 2008 0.000 0 736 736 716 1452 2 8 0 1978.506164 0 0 0 0 113000
49 50 66.000000 5 1966 1966 0.000 763 955 955 0 955 1 6 0 1966.000000 1 386 0 0 127000
In [37]:
skewValue = df2_n.skew(axis=0, numeric_only=True)
print(skewValue)
Id              0.000000
LotFrontage     0.061947
OverallQual     0.249513
YearBuilt      -0.594020
YearRemodAdd   -0.503562
MasVnrArea      1.281293
BsmtFinSF1      0.739746
TotalBsmtSF     0.237240
1stFlrSF        0.664066
2ndFlrSF        0.800109
GrLivArea       0.593212
FullBath        0.036562
TotRmsAbvGrd    0.366919
Fireplaces      0.550521
GarageYrBlt    -0.666040
GarageCars     -0.418495
GarageArea     -0.069907
WoodDeckSF      1.083673
OpenPorchSF     1.136301
SalePrice       0.802784
dtype: float64
In [38]:
sns.pairplot(df2_n)
Out[38]:
<seaborn.axisgrid.PairGrid at 0x7fcacfd07c70>

EDA of categorical variables¶

In [39]:
df_c.head()
Out[39]:
Id MSZoning Street LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2 Heating HeatingQC CentralAir Electrical KitchenQual Functional FireplaceQu GarageType GarageFinish GarageQual GarageCond PavedDrive SaleType SaleCondition SalePrice
0 1 RL Pave Reg Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA No GLQ Unf GasA Ex Y SBrkr Gd Typ NaN Attchd RFn TA TA Y WD Normal 208500
1 2 RL Pave Reg Lvl AllPub FR2 Gtl Veenker Feedr Norm 1Fam 1Story Gable CompShg MetalSd MetalSd None TA TA CBlock Gd TA Gd ALQ Unf GasA Ex Y SBrkr TA Typ TA Attchd RFn TA TA Y WD Normal 181500
2 3 RL Pave IR1 Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA Mn GLQ Unf GasA Ex Y SBrkr Gd Typ TA Attchd RFn TA TA Y WD Normal 223500
3 4 RL Pave IR1 Lvl AllPub Corner Gtl Crawfor Norm Norm 1Fam 2Story Gable CompShg Wd Sdng Wd Shng None TA TA BrkTil TA Gd No ALQ Unf GasA Gd Y SBrkr Gd Typ Gd Detchd Unf TA TA Y WD Abnorml 140000
4 5 RL Pave IR1 Lvl AllPub FR2 Gtl NoRidge Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA Av GLQ Unf GasA Ex Y SBrkr Gd Typ TA Attchd RFn TA TA Y WD Normal 250000
In [40]:
df.head()
Out[40]:
Id MSSubClass MSZoning LotFrontage LotArea Street LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle OverallQual OverallCond YearBuilt YearRemodAdd RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType MasVnrArea ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1 BsmtFinType2 BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating HeatingQC CentralAir Electrical 1stFlrSF 2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath BsmtHalfBath FullBath HalfBath Bedroom Kitchen KitchenQual TotRmsAbvGrd Functional Fireplaces FireplaceQu GarageType GarageYrBlt GarageFinish GarageCars GarageArea GarageQual GarageCond PavedDrive WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea MiscVal MoSold YrSold SaleType SaleCondition SalePrice
0 1 60 RL 65.0 8450 Pave Reg Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story 7 5 2003 2003 Gable CompShg VinylSd VinylSd BrkFace 196.0 Gd TA PConc Gd TA No GLQ 706 Unf 0 150 856 GasA Ex Y SBrkr 856 854 0 1710 1 0 2 1 3 1 Gd 8 Typ 0 NaN Attchd 2003.0 RFn 2 548 TA TA Y 0 61 0 0 0 0 0 2 2008 WD Normal 208500
1 2 20 RL 80.0 9600 Pave Reg Lvl AllPub FR2 Gtl Veenker Feedr Norm 1Fam 1Story 6 8 1976 1976 Gable CompShg MetalSd MetalSd None 0.0 TA TA CBlock Gd TA Gd ALQ 978 Unf 0 284 1262 GasA Ex Y SBrkr 1262 0 0 1262 0 1 2 0 3 1 TA 6 Typ 1 TA Attchd 1976.0 RFn 2 460 TA TA Y 298 0 0 0 0 0 0 5 2007 WD Normal 181500
2 3 60 RL 68.0 11250 Pave IR1 Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story 7 5 2001 2002 Gable CompShg VinylSd VinylSd BrkFace 162.0 Gd TA PConc Gd TA Mn GLQ 486 Unf 0 434 920 GasA Ex Y SBrkr 920 866 0 1786 1 0 2 1 3 1 Gd 6 Typ 1 TA Attchd 2001.0 RFn 2 608 TA TA Y 0 42 0 0 0 0 0 9 2008 WD Normal 223500
3 4 70 RL 60.0 9550 Pave IR1 Lvl AllPub Corner Gtl Crawfor Norm Norm 1Fam 2Story 7 5 1915 1970 Gable CompShg Wd Sdng Wd Shng None 0.0 TA TA BrkTil TA Gd No ALQ 216 Unf 0 540 756 GasA Gd Y SBrkr 961 756 0 1717 1 0 1 0 3 1 Gd 7 Typ 1 Gd Detchd 1998.0 Unf 3 642 TA TA Y 0 35 272 0 0 0 0 2 2006 WD Abnorml 140000
4 5 60 RL 84.0 14260 Pave IR1 Lvl AllPub FR2 Gtl NoRidge Norm Norm 1Fam 2Story 8 5 2000 2000 Gable CompShg VinylSd VinylSd BrkFace 350.0 Gd TA PConc Gd TA Av GLQ 655 Unf 0 490 1145 GasA Ex Y SBrkr 1145 1053 0 2198 1 0 2 1 4 1 Gd 9 Typ 1 TA Attchd 2000.0 RFn 3 836 TA TA Y 192 84 0 0 0 0 0 12 2008 WD Normal 250000
In [41]:
for col in df_c:
    print('*****  ' + col + '  count= ' + str(len(df_c[col].unique()))  + '  ******')
    print(df_c[col].unique())
*****  Id  count= 1460  ******
[   1    2    3 ... 1458 1459 1460]
*****  MSZoning  count= 5  ******
['RL' 'RM' 'C (all)' 'FV' 'RH']
*****  Street  count= 2  ******
['Pave' 'Grvl']
*****  LotShape  count= 4  ******
['Reg' 'IR1' 'IR2' 'IR3']
*****  LandContour  count= 4  ******
['Lvl' 'Bnk' 'Low' 'HLS']
*****  Utilities  count= 2  ******
['AllPub' 'NoSeWa']
*****  LotConfig  count= 5  ******
['Inside' 'FR2' 'Corner' 'CulDSac' 'FR3']
*****  LandSlope  count= 3  ******
['Gtl' 'Mod' 'Sev']
*****  Neighborhood  count= 25  ******
['CollgCr' 'Veenker' 'Crawfor' 'NoRidge' 'Mitchel' 'Somerst' 'NWAmes'
 'OldTown' 'BrkSide' 'Sawyer' 'NridgHt' 'mes' 'SawyerW' 'IDOTRR' 'MeadowV'
 'Edwards' 'Timber' 'Gilbert' 'StoneBr' 'ClearCr' 'NPkVill' 'Blmngtn'
 'BrDale' 'SWISU' 'Blueste']
*****  Condition1  count= 9  ******
['Norm' 'Feedr' 'PosN' 'Artery' 'RRAe' 'RRNn' 'RRAn' 'PosA' 'RRNe']
*****  Condition2  count= 8  ******
['Norm' 'Artery' 'RRNn' 'Feedr' 'PosN' 'PosA' 'RRAn' 'RRAe']
*****  BldgType  count= 5  ******
['1Fam' '2fmCon' 'Duplex' 'TwnhsE' 'Twnhs']
*****  HouseStyle  count= 8  ******
['2Story' '1Story' '1.5Fin' '1.5Unf' 'SFoyer' 'SLvl' '2.5Unf' '2.5Fin']
*****  RoofStyle  count= 6  ******
['Gable' 'Hip' 'Gambrel' 'Mansard' 'Flat' 'Shed']
*****  RoofMatl  count= 8  ******
['CompShg' 'WdShngl' 'Metal' 'WdShake' 'Membran' 'Tar&Grv' 'Roll'
 'ClyTile']
*****  Exterior1st  count= 15  ******
['VinylSd' 'MetalSd' 'Wd Sdng' 'HdBoard' 'BrkFace' 'WdShing' 'CemntBd'
 'Plywood' 'AsbShng' 'Stucco' 'BrkComm' 'AsphShn' 'Stone' 'ImStucc'
 'CBlock']
*****  Exterior2nd  count= 16  ******
['VinylSd' 'MetalSd' 'Wd Shng' 'HdBoard' 'Plywood' 'Wd Sdng' 'CmentBd'
 'BrkFace' 'Stucco' 'AsbShng' 'Brk Cmn' 'ImStucc' 'AsphShn' 'Stone'
 'Other' 'CBlock']
*****  MasVnrType  count= 5  ******
['BrkFace' 'None' 'Stone' 'BrkCmn' nan]
*****  ExterQual  count= 4  ******
['Gd' 'TA' 'Ex' 'Fa']
*****  ExterCond  count= 5  ******
['TA' 'Gd' 'Fa' 'Po' 'Ex']
*****  Foundation  count= 6  ******
['PConc' 'CBlock' 'BrkTil' 'Wood' 'Slab' 'Stone']
*****  BsmtQual  count= 5  ******
['Gd' 'TA' 'Ex' nan 'Fa']
*****  BsmtCond  count= 5  ******
['TA' 'Gd' nan 'Fa' 'Po']
*****  BsmtExposure  count= 5  ******
['No' 'Gd' 'Mn' 'Av' nan]
*****  BsmtFinType1  count= 7  ******
['GLQ' 'ALQ' 'Unf' 'Rec' 'BLQ' nan 'LwQ']
*****  BsmtFinType2  count= 7  ******
['Unf' 'BLQ' nan 'ALQ' 'Rec' 'LwQ' 'GLQ']
*****  Heating  count= 6  ******
['GasA' 'GasW' 'Grav' 'Wall' 'OthW' 'Floor']
*****  HeatingQC  count= 5  ******
['Ex' 'Gd' 'TA' 'Fa' 'Po']
*****  CentralAir  count= 2  ******
['Y' 'N']
*****  Electrical  count= 6  ******
['SBrkr' 'FuseF' 'FuseA' 'FuseP' 'Mix' nan]
*****  KitchenQual  count= 4  ******
['Gd' 'TA' 'Ex' 'Fa']
*****  Functional  count= 7  ******
['Typ' 'Min1' 'Maj1' 'Min2' 'Mod' 'Maj2' 'Sev']
*****  FireplaceQu  count= 6  ******
[nan 'TA' 'Gd' 'Fa' 'Ex' 'Po']
*****  GarageType  count= 7  ******
['Attchd' 'Detchd' 'BuiltIn' 'CarPort' nan 'Basment' '2Types']
*****  GarageFinish  count= 4  ******
['RFn' 'Unf' 'Fin' nan]
*****  GarageQual  count= 6  ******
['TA' 'Fa' 'Gd' nan 'Ex' 'Po']
*****  GarageCond  count= 6  ******
['TA' 'Fa' nan 'Gd' 'Po' 'Ex']
*****  PavedDrive  count= 3  ******
['Y' 'N' 'P']
*****  SaleType  count= 9  ******
['WD' 'New' 'COD' 'ConLD' 'ConLI' 'CWD' 'ConLw' 'Con' 'Oth']
*****  SaleCondition  count= 6  ******
['Normal' 'Abnorml' 'Partial' 'AdjLand' 'Alloca' 'Family']
*****  SalePrice  count= 663  ******
[208500 181500 223500 140000 250000 143000 307000 200000 129900 118000
 129500 345000 144000 279500 157000 132000 149000  90000 159000 139000
 325300 139400 230000 154000 256300 134800 306000 207500  68500  40000
 149350 179900 165500 277500 309000 145000 153000 109000  82000 160000
 170000 130250 141000 319900 239686 249700 113000 127000 177000 114500
 110000 385000 130000 180500 172500 196500 438780 124900 158000 101000
 202500 219500 317000 180000 226000  80000 225000 244000 185000 144900
 107400  91000 135750 136500 193500 153500 245000 126500 168500 260000
 174000 164500  85000 123600 109900  98600 163500 133900 204750 214000
  94750  83000 128950 205000 178000 118964 198900 169500 100000 115000
 190000 136900 383970 217000 259500 176000 155000 320000 163990 136000
 153900 181000  84500 128000  87000 150000 150750 220000 171000 231500
 166000 204000 125000 105000 222500 122000 372402 235000  79000 109500
 269500 254900 162500 412500 103200 152000 127500 325624 183500 228000
 128500 215000 239000 163000 184000 243000 211000 501837 200100 120000
 475000 173000 135000 153337 286000 315000 192000 148500 311872 104000
 274900 171500 112000 143900 277000  98000 186000 252678 156000 161750
 134450 210000 107000 311500 167240 204900  97000 386250 290000 106000
 192500 148000 403000  94500 128200 216500  89500 185500 194500 318000
 262500 110500 241500 137000  76500 276000 151000  73000 175500 179500
 120500 266000 124500 201000 415298 228500 244600 179200 164700  88000
 153575 233230 135900 131000 167000 142500 175000 158500 267000 149900
 295000 305900  82500 360000 165600 119900 375000 188500 270000 187500
 342643 354000 301000 126175 242000 324000 145250 214500  78000 119000
 284000 207000 228950 377426 202900  87500 140200 151500 157500 437154
 318061  95000 105900 177500 134000 280000 198500 147000 165000 162000
 172400 134432 123000  61000 340000 394432 179000 187750 213500  76000
 240000  81000 191000 426000 106500 129000  67000 241000 245500 164990
 108000 258000 168000 339750  60000 222000 181134 149500 126000 142000
 206300 275000 109008 195400  85400  79900 122500 212000 116000  90350
 555000 162900 199900 119500 188000 256000 161000 263435  62383 188700
 124000 178740 146500 187000 440000 251000 132500 208900 380000 297000
  89471 326000 374000 164000  86000 133000 172785  91300  34900 430000
 226700 289000 208300 164900 202665  96500 402861 265000 234000 106250
 184750 315750 446261 200624 107500  39300 111250 272000 248000 213250
 179665 229000 263000 112500 255500 121500 268000 325000 316600 135960
 142600 224500 118500 146000 131500 181900 253293 369900  79500 185900
 451950 138000 319000 114504 194201 217500 221000 359100 313000 261500
  75500 137500 183200 105500 314813 305000 165150 139900 209500  93000
 264561 274000 370878 143250  98300 205950 350000 145500  97500 197900
 402000 423000 230500 173500 103600 257500 372500 159434 285000 227875
 148800 392000 194700 755000 335000 108480 141500  89000 123500 138500
 196000 312500 361919 213000  55000 302000 254000 179540  52000 102776
 189000 130500 159500 341000 103000 236500 131400  93500 239900 299800
 236000 265979 260400 275500 158900 179400 215200 337000 264132 216837
 538000 134900 102000 395000 221500 175900 187100 161500 233000 107900
 160200 146800 269790 143500 485000 582933 227680 135500 159950 144500
  55993 157900 224900 271000 224000 183000 139500 232600 147400 237000
 139950 174900 133500 189950 250580 248900 169000 200500  66500 303477
 132250 328900 122900 154500 118858 142953 611657 125500 255000 154300
 173733  75000  35311 238000 176500 145900 169990 193000 117500 184900
 253000 239799 244400 150900 197500 172000 116500 214900 178900  37900
  99500 182000 167500  85500 178400 336000 159895 255900 117000 395192
 195000 197000 348000 173900 337500 121600 206000 232000 136905 119200
 227000 203000 213490 194000 287000 293077 310000 119750  84000 315500
 262280 278000 139600 556581  84900 176485 200141 185850 328000 167900
 151400  91500 138800 155900  83500 252000  92900 176432 274725 134500
 184100 133700 118400 212900 163900 259000 239500  94000 424870 174500
 116900 201800 218000 235128 108959 233170 245350 625000 171900 154900
 392500 745000 186700 104900 262000 219210 116050 271900 229456  80500
 137900 367294 101800 138887 265900 248328 465000 186500 169900 171750
 294000 165400 301500  99900 128900 183900 378500 381000 185750  68400
 150500 281000 333168 206900 295493 111000 156500  72500  52500 155835
 108500 283463 410000 156932 144152 216000 274300 466500  58500 237500
 377500 246578 281213 137450 193879 282922 257000 223000 274970 182900
 192140 143750  64500 394617 149700 149300 121000 179600  92000 287090
 266500 142125 147500]
In [42]:
null_columns = df_c.isna().sum()
null_columns = null_columns[null_columns > 0]
null_columns
Out[42]:
MasVnrType        8
BsmtQual         37
BsmtCond         37
BsmtExposure     38
BsmtFinType1     37
BsmtFinType2     38
Electrical        1
FireplaceQu     690
GarageType       81
GarageFinish     81
GarageQual       81
GarageCond       81
dtype: int64

Replacing Nulls with the mode¶

In [43]:
df_clean = df_c.apply(lambda x: x.fillna(x.value_counts().index[0]))
df_clean.head()
Out[43]:
Id MSZoning Street LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2 Heating HeatingQC CentralAir Electrical KitchenQual Functional FireplaceQu GarageType GarageFinish GarageQual GarageCond PavedDrive SaleType SaleCondition SalePrice
0 1 RL Pave Reg Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA No GLQ Unf GasA Ex Y SBrkr Gd Typ Gd Attchd RFn TA TA Y WD Normal 208500
1 2 RL Pave Reg Lvl AllPub FR2 Gtl Veenker Feedr Norm 1Fam 1Story Gable CompShg MetalSd MetalSd None TA TA CBlock Gd TA Gd ALQ Unf GasA Ex Y SBrkr TA Typ TA Attchd RFn TA TA Y WD Normal 181500
2 3 RL Pave IR1 Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA Mn GLQ Unf GasA Ex Y SBrkr Gd Typ TA Attchd RFn TA TA Y WD Normal 223500
3 4 RL Pave IR1 Lvl AllPub Corner Gtl Crawfor Norm Norm 1Fam 2Story Gable CompShg Wd Sdng Wd Shng None TA TA BrkTil TA Gd No ALQ Unf GasA Gd Y SBrkr Gd Typ Gd Detchd Unf TA TA Y WD Abnorml 140000
4 5 RL Pave IR1 Lvl AllPub FR2 Gtl NoRidge Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA Av GLQ Unf GasA Ex Y SBrkr Gd Typ TA Attchd RFn TA TA Y WD Normal 250000
In [44]:
df_c = df_clean.copy()
null_columns = df_clean.isna().sum()
null_columns = null_columns[null_columns > 0]
null_columns
Out[44]:
Series([], dtype: int64)
In [45]:
df_c.head()
Out[45]:
Id MSZoning Street LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2 Heating HeatingQC CentralAir Electrical KitchenQual Functional FireplaceQu GarageType GarageFinish GarageQual GarageCond PavedDrive SaleType SaleCondition SalePrice
0 1 RL Pave Reg Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA No GLQ Unf GasA Ex Y SBrkr Gd Typ Gd Attchd RFn TA TA Y WD Normal 208500
1 2 RL Pave Reg Lvl AllPub FR2 Gtl Veenker Feedr Norm 1Fam 1Story Gable CompShg MetalSd MetalSd None TA TA CBlock Gd TA Gd ALQ Unf GasA Ex Y SBrkr TA Typ TA Attchd RFn TA TA Y WD Normal 181500
2 3 RL Pave IR1 Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA Mn GLQ Unf GasA Ex Y SBrkr Gd Typ TA Attchd RFn TA TA Y WD Normal 223500
3 4 RL Pave IR1 Lvl AllPub Corner Gtl Crawfor Norm Norm 1Fam 2Story Gable CompShg Wd Sdng Wd Shng None TA TA BrkTil TA Gd No ALQ Unf GasA Gd Y SBrkr Gd Typ Gd Detchd Unf TA TA Y WD Abnorml 140000
4 5 RL Pave IR1 Lvl AllPub FR2 Gtl NoRidge Norm Norm 1Fam 2Story Gable CompShg VinylSd VinylSd BrkFace Gd TA PConc Gd TA Av GLQ Unf GasA Ex Y SBrkr Gd Typ TA Attchd RFn TA TA Y WD Normal 250000

Trying to figure out P-Value and Chi Square¶

In [46]:
def histPlotLoop(df, columns):
    for col in columns:
        if (df[col].dtype == object) :
            i = df_c[col].unique()
            i.sort()
            plt.figure(figsize=(16,6))
            ax = sns.countplot(x=col, data=df, order=i)
            ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right", fontsize=8)
            plt.title(col, fontsize=14, fontweight='bold')
            plt.show()
            
            
histPlotLoop(df_c, df_c.columns)
In [49]:
import category_encoders as ce
In [51]:
encoder = ce.ordinal.OrdinalEncoder(return_df = True)  
df_train = encoder.fit_transform(df_c)
In [52]:
df_train.head()
Out[52]:
Id MSZoning Street LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2 Heating HeatingQC CentralAir Electrical KitchenQual Functional FireplaceQu GarageType GarageFinish GarageQual GarageCond PavedDrive SaleType SaleCondition SalePrice
0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 208500
1 2 1 1 1 1 1 2 1 2 2 1 1 2 1 1 2 2 2 2 1 2 1 1 2 2 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 181500
2 3 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 223500
3 4 1 1 2 1 1 3 1 3 1 1 1 1 1 1 3 3 2 2 1 3 2 2 1 2 1 1 2 1 1 1 1 1 2 2 1 1 1 1 2 140000
4 5 1 1 2 1 1 2 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 250000
In [53]:
df_c = df_train
In [54]:
df_c.corr()
Out[54]:
Id MSZoning Street LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2 Heating HeatingQC CentralAir Electrical KitchenQual Functional FireplaceQu GarageType GarageFinish GarageQual GarageCond PavedDrive SaleType SaleCondition SalePrice
Id 1.000000 -0.011949 -0.008916 -0.024071 0.014769 0.013324 -0.038192 0.005847 -0.013078 -0.010735 -0.016961 0.019230 0.026166 0.015873 0.013375 0.003624 0.015449 0.024866 -0.021844 -0.037865 0.020023 -0.020092 -0.013579 0.031591 0.012783 0.011692 0.061902 0.015001 -0.009821 -0.048029 -0.020927 0.006018 -0.024325 0.009743 -0.002604 -0.014586 0.000987 0.008921 -0.016932 -0.017712 -0.021917
MSZoning -0.011949 1.000000 0.039678 -0.167918 -0.013396 -0.011167 -0.094747 -0.043444 -0.067889 -0.026021 0.016014 0.271750 -0.047199 -0.088537 -0.049864 -0.063732 -0.025637 0.069241 -0.018093 0.017808 0.011181 -0.015100 0.059041 -0.069148 0.015065 -0.083130 0.012889 -0.035643 0.144970 0.084804 -0.030523 -0.029668 -0.117721 0.099439 0.005704 0.039895 0.028618 0.117402 0.058948 0.068705 -0.116047
Street -0.008916 0.039678 1.000000 0.010129 0.097236 -0.001682 0.004458 0.179360 0.053445 0.023914 -0.005881 0.023392 0.025328 0.006881 -0.007749 0.011874 0.015314 -0.013980 0.143047 0.002633 0.021128 -0.003653 -0.017284 0.045421 0.019357 0.041977 -0.008194 0.043211 0.069869 0.021466 0.055746 -0.015465 -0.017230 0.135136 -0.010497 -0.012414 -0.010851 0.011248 0.021171 0.022919 -0.041036
LotShape -0.024071 -0.167918 0.010129 1.000000 0.201047 0.026616 0.308073 0.144248 -0.013295 0.066612 0.052183 -0.134373 -0.067647 0.043063 0.118271 -0.030630 -0.068168 0.034001 -0.111502 -0.047015 -0.158779 -0.117374 -0.010689 0.106951 -0.088367 0.047531 -0.043320 -0.119556 -0.099138 -0.086719 -0.089061 -0.017841 0.102891 -0.045583 0.067334 -0.046539 -0.065380 -0.100666 0.001435 -0.004773 0.267759
LandContour 0.014769 -0.013396 0.097236 0.201047 1.000000 -0.007963 0.021107 0.507203 0.113336 -0.021478 0.012096 -0.042667 -0.004987 0.100625 0.109717 0.034502 0.014566 0.086651 0.075493 0.001553 -0.000145 0.019828 0.009301 0.078411 0.010181 -0.024136 -0.024849 0.028243 0.023605 0.021122 0.037246 0.016088 -0.053251 0.032066 0.055327 -0.030841 -0.022491 0.081031 -0.014537 0.062826 0.092009
Utilities 0.013324 -0.011167 -0.001682 0.026616 -0.007963 1.000000 0.062298 -0.005909 0.029258 -0.008311 -0.002397 -0.010899 0.076219 -0.011462 -0.003158 0.008321 0.004602 -0.033471 0.012733 -0.008842 0.008611 0.010654 -0.007044 -0.016287 -0.030603 0.068449 -0.003339 0.003963 -0.006907 0.091048 -0.026715 -0.006303 -0.012962 0.114822 -0.034307 -0.005059 -0.004422 -0.007368 0.057455 0.019173 -0.014314
LotConfig -0.038192 -0.094747 0.004458 0.308073 0.021107 0.062298 1.000000 0.033673 -0.055593 0.061695 0.018440 -0.106812 0.005696 0.067337 0.112297 0.022409 0.015406 -0.004082 -0.037293 0.008542 0.000844 0.009188 0.034666 0.065806 -0.000638 0.022254 0.004421 -0.018754 -0.026600 -0.038397 -0.046671 0.008194 0.057436 -0.006936 0.029645 0.006178 0.034839 -0.065917 0.012120 -0.013638 0.109106
LandSlope 0.005847 -0.043444 0.179360 0.144248 0.507203 -0.005909 0.033673 1.000000 0.072891 -0.020333 -0.014034 -0.056801 0.018446 0.189081 0.221080 0.110339 0.093730 0.066658 0.092514 0.002675 0.044388 0.002120 0.017945 0.083769 0.018924 0.084262 0.003625 0.057444 0.010849 0.013048 0.029612 0.097882 0.013180 0.063664 0.018310 -0.014761 -0.008288 0.018009 -0.056680 0.017187 0.051152
Neighborhood -0.013078 -0.067889 0.053445 -0.013295 0.113336 0.029258 -0.055593 0.072891 1.000000 0.002747 -0.012207 0.184198 0.060100 0.061029 0.091511 0.102043 0.121032 0.025252 0.176707 -0.018067 0.104070 0.090658 0.075323 -0.081853 0.136957 0.018894 0.000671 0.130972 0.070703 0.070396 0.153756 0.077679 0.011046 0.088574 0.175353 -0.030853 0.006989 0.065998 0.008466 0.007101 -0.143621
Condition1 -0.010735 -0.026021 0.023914 0.066612 -0.021478 -0.008311 0.061695 -0.020333 0.002747 1.000000 0.189172 -0.088630 -0.026940 0.008366 0.046832 0.027841 0.006597 0.014508 0.057713 0.035770 0.057340 0.052677 0.008640 -0.053324 0.047064 0.026240 0.009515 0.048060 0.040365 0.042287 0.050975 -0.003683 -0.012094 0.021200 0.026413 0.006257 0.002639 0.034886 0.003128 -0.019800 -0.044820
Condition2 -0.016961 0.016014 -0.005881 0.052183 0.012096 -0.002397 0.018440 -0.014034 -0.012207 0.189172 1.000000 0.009902 0.025958 0.097614 -0.011044 0.067090 0.037421 0.011156 0.105079 0.098064 0.049946 0.067830 0.028978 -0.018015 -0.012587 0.018532 -0.011678 0.031041 0.035238 -0.006245 0.062163 -0.022040 -0.025387 0.064196 0.008562 0.062199 0.006563 0.054484 0.011960 0.005158 -0.004833
BldgType 0.019230 0.271750 0.023392 -0.134373 -0.042667 -0.010899 -0.106812 -0.056801 0.184198 -0.088630 0.009902 1.000000 -0.058511 -0.069012 -0.024057 0.074220 0.035336 -0.052490 -0.082211 -0.073555 -0.082327 -0.142979 -0.038452 0.014987 -0.127092 -0.008480 -0.004899 0.007516 -0.008399 -0.064550 -0.046908 -0.040279 -0.042824 0.013266 0.016879 -0.048735 -0.056821 -0.064447 0.044289 0.040365 -0.112611
HouseStyle 0.026166 -0.047199 0.025328 -0.067647 -0.004987 0.076219 0.005696 0.018446 0.060100 -0.026940 0.025958 -0.058511 1.000000 0.005490 0.042447 0.121359 0.137919 -0.025943 0.188102 0.083020 0.189020 0.048008 0.034996 0.169969 0.018787 0.055591 0.063811 0.144855 0.061378 0.058579 0.109841 0.036488 -0.016011 0.112935 -0.029816 0.073689 0.065567 0.038707 -0.006154 -0.029049 -0.188688
RoofStyle 0.015873 -0.088537 0.006881 0.043063 0.100625 -0.011462 0.067337 0.189081 0.061029 0.008366 0.097614 -0.069012 0.005490 1.000000 0.509733 0.117708 0.115663 -0.018066 0.090164 0.040420 0.017324 0.117402 0.032465 0.012399 0.007190 0.076391 0.002689 0.007909 0.011614 -0.036031 0.086051 0.074203 0.036268 -0.006125 0.072046 -0.006440 0.050651 0.011743 -0.031824 0.032990 0.159332
RoofMatl 0.013375 -0.049864 -0.007749 0.118271 0.109717 -0.003158 0.112297 0.221080 0.091511 0.046832 -0.011044 -0.024057 0.042447 0.509733 1.000000 0.195398 0.116622 0.047836 0.021100 0.028508 0.030506 -0.025116 0.029506 0.021721 -0.003143 0.053925 0.005356 0.034147 -0.006083 0.005184 0.033353 0.096702 0.047445 0.012190 0.028933 0.024703 0.059823 -0.033947 -0.014712 0.055436 0.035820
Exterior1st 0.003624 -0.063732 0.011874 -0.030630 0.034502 0.008321 0.022409 0.110339 0.102043 0.027841 0.067090 0.074220 0.121359 0.117708 0.195398 1.000000 0.749599 0.001471 0.253995 0.053318 0.289169 0.072958 -0.003168 -0.065751 0.123582 0.164247 0.078936 0.343369 0.110442 0.082794 0.188481 0.062031 0.104904 0.020894 0.014384 0.043935 0.051225 0.058754 -0.015554 0.000631 -0.120586
Exterior2nd 0.015449 -0.025637 0.015314 -0.068168 0.014566 0.004602 0.015406 0.093730 0.121032 0.006597 0.037421 0.035336 0.137919 0.115663 0.116622 0.749599 1.000000 0.035522 0.275078 0.046500 0.339574 0.150843 0.047921 -0.110260 0.176651 0.135464 0.127524 0.341511 0.209144 0.171184 0.209612 0.097594 0.042182 0.061058 0.013905 0.101342 0.085423 0.119255 -0.033452 -0.039999 -0.164716
MasVnrType 0.024866 0.069241 -0.013980 0.034001 0.086651 -0.033471 -0.004082 0.066658 0.025252 0.014508 0.011156 -0.052490 -0.025943 -0.018066 0.047836 0.001471 0.035522 1.000000 0.043245 0.018964 0.034848 0.115350 0.025659 0.023652 0.040224 0.013607 0.022095 -0.018947 0.056008 0.046910 0.083036 0.084426 -0.119612 0.058091 0.040864 0.031918 0.033172 0.082059 0.071044 0.106159 -0.010695
ExterQual -0.021844 -0.018093 0.143047 -0.111502 0.075493 0.012733 -0.037293 0.092514 0.176707 0.057713 0.105079 -0.082211 0.188102 0.090164 0.021100 0.253995 0.275078 0.043245 1.000000 0.153432 0.366565 0.389251 0.107594 -0.149535 0.255477 0.087775 0.062039 0.380002 0.195975 0.183732 0.528378 0.098799 -0.017219 0.135777 0.063070 0.031578 0.053477 0.166263 0.022013 -0.009670 -0.265015
ExterCond -0.037865 0.017808 0.002633 -0.047015 0.001553 -0.008842 0.008542 0.002675 -0.018067 0.035770 0.098064 -0.073555 0.083020 0.040420 0.028508 0.053318 0.046500 0.018964 0.153432 1.000000 0.206618 0.091963 0.212929 -0.065298 0.081530 -0.014319 0.033196 0.055997 0.112829 0.053171 0.098395 0.098871 -0.012548 0.022384 0.003008 0.093184 0.059977 0.139432 0.003909 -0.038491 -0.121706
Foundation 0.020023 0.011181 0.021128 -0.158779 -0.000145 0.008611 0.000844 0.044388 0.104070 0.057340 0.049946 -0.082327 0.189020 0.017324 0.030506 0.289169 0.339574 0.034848 0.366565 0.206618 1.000000 0.258424 0.112632 -0.194213 0.329651 0.045337 0.238561 0.419451 0.327406 0.241477 0.270250 0.178060 -0.043634 0.155160 -0.054610 0.181056 0.144817 0.243846 -0.050491 -0.093201 -0.429678
BsmtQual -0.020092 -0.015100 -0.003653 -0.117374 0.019828 0.010654 0.009188 0.002120 0.090658 0.052677 0.067830 -0.142979 0.048008 0.117402 -0.025116 0.072958 0.150843 0.115350 0.389251 0.091963 0.258424 1.000000 0.144371 -0.087171 0.191781 0.002272 0.020770 0.125190 0.134404 0.193564 0.377461 0.079358 -0.054100 0.096753 0.105632 0.142403 0.140402 0.143237 0.029998 0.057392 -0.004053
BsmtCond -0.013579 0.059041 -0.017284 -0.010689 0.009301 -0.007044 0.034666 0.017945 0.075323 0.008640 0.028978 -0.038452 0.034996 0.032465 0.029506 -0.003168 0.047921 0.025659 0.107594 0.212929 0.112632 0.144371 1.000000 -0.038548 0.096008 -0.001153 0.063961 0.089517 0.179716 0.166612 0.115948 0.140345 -0.046763 0.058767 0.024657 0.069530 0.165657 0.116541 0.060624 -0.004425 -0.082016
BsmtExposure 0.031591 -0.069148 0.045421 0.106951 0.078411 -0.016287 0.065806 0.083769 -0.081853 -0.053324 -0.018015 0.014987 0.169969 0.012399 0.021721 -0.065751 -0.110260 0.023652 -0.149535 -0.065298 -0.194213 -0.087171 -0.038548 1.000000 -0.211143 0.029815 -0.059522 -0.139312 -0.085375 -0.119133 -0.070103 -0.072731 0.003477 -0.039763 0.015157 -0.047842 -0.033896 -0.070336 0.043161 0.094537 0.229632
BsmtFinType1 0.012783 0.015065 0.019357 -0.088367 0.010181 -0.030603 -0.000638 0.018924 0.136957 0.047064 -0.012587 -0.127092 0.018787 0.007190 -0.003143 0.123582 0.176651 0.040224 0.255477 0.081530 0.329651 0.191781 0.096008 -0.211143 1.000000 0.172309 0.066529 0.303916 0.123151 0.190449 0.180232 0.130886 -0.075103 0.102446 -0.051446 0.044489 0.045777 0.114083 -0.017829 -0.067683 -0.360407
BsmtFinType2 0.011692 -0.083130 0.041977 0.047531 -0.024136 0.068449 0.022254 0.084262 0.018894 0.026240 0.018532 -0.008480 0.055591 0.076391 0.053925 0.164247 0.135464 0.013607 0.087775 -0.014319 0.045337 0.002272 -0.001153 0.029815 0.172309 1.000000 -0.019777 0.112530 -0.045441 -0.044401 0.011502 0.073947 0.075770 -0.009329 -0.030182 -0.004602 0.025548 -0.077327 -0.008694 -0.060118 -0.046649
Heating 0.061902 0.012889 -0.008194 -0.043320 -0.024849 -0.003339 0.004421 0.003625 0.000671 0.009515 -0.011678 -0.004899 0.063811 0.002689 0.005356 0.078936 0.127524 0.022095 0.062039 0.033196 0.238561 0.020770 0.063961 -0.059522 0.066529 -0.019777 1.000000 0.217619 0.429067 0.131262 0.123369 0.059807 -0.029643 0.049661 0.008418 0.086007 0.133256 0.141864 0.018388 -0.032650 -0.106673
HeatingQC 0.015001 -0.035643 0.043211 -0.119556 0.028243 0.003963 -0.018754 0.057444 0.130972 0.048060 0.031041 0.007516 0.144855 0.007909 0.034147 0.343369 0.341511 -0.018947 0.380002 0.055997 0.419451 0.125190 0.089517 -0.139312 0.303916 0.112530 0.217619 1.000000 0.306294 0.177931 0.299089 0.059352 -0.009620 0.064946 -0.056186 0.054182 0.090005 0.193215 -0.009051 -0.049721 -0.427649
CentralAir -0.009821 0.144970 0.069869 -0.099138 0.023605 -0.006907 -0.026600 0.010849 0.070703 0.040365 0.035238 -0.008399 0.061378 0.011614 -0.006083 0.110442 0.209144 0.056008 0.195975 0.112829 0.327406 0.134404 0.179716 -0.085375 0.123151 -0.045441 0.429067 0.306294 1.000000 0.294177 0.245980 0.099165 -0.107931 0.116537 0.017410 0.150958 0.180947 0.275660 0.026522 -0.010168 -0.251328
Electrical -0.048029 0.084804 0.021466 -0.086719 0.021122 0.091048 -0.038397 0.013048 0.070396 0.042287 -0.006245 -0.064550 0.058579 -0.036031 0.005184 0.082794 0.171184 0.046910 0.183732 0.053171 0.241477 0.193564 0.166612 -0.119133 0.190449 -0.044401 0.131262 0.177931 0.294177 1.000000 0.219022 0.093060 -0.100106 0.109852 -0.003678 0.138483 0.163950 0.175431 -0.015363 -0.022997 -0.231417
KitchenQual -0.020927 -0.030523 0.055746 -0.089061 0.037246 -0.026715 -0.046671 0.029612 0.153756 0.050975 0.062163 -0.046908 0.109841 0.086051 0.033353 0.188481 0.209612 0.083036 0.528378 0.098395 0.270250 0.377461 0.115948 -0.070103 0.180232 0.011502 0.123369 0.299089 0.245980 0.219022 1.000000 0.087156 -0.000807 0.116516 0.135430 0.082124 0.104778 0.164511 -0.000178 0.033660 -0.114746
Functional 0.006018 -0.029668 -0.015465 -0.017841 0.016088 -0.006303 0.008194 0.097882 0.077679 -0.003683 -0.022040 -0.040279 0.036488 0.074203 0.096702 0.062031 0.097594 0.084426 0.098799 0.098871 0.178060 0.079358 0.140345 -0.072731 0.130886 0.073947 0.059807 0.059352 0.099165 0.093060 0.087156 1.000000 0.040767 0.090222 0.014572 0.072965 0.042658 0.047388 -0.014494 -0.055108 -0.108367
FireplaceQu -0.024325 -0.117721 -0.017230 0.102891 -0.053251 -0.012962 0.057436 0.013180 0.011046 -0.012094 -0.025387 -0.042824 -0.016011 0.036268 0.047445 0.104904 0.042182 -0.119612 -0.017219 -0.012548 -0.043634 -0.054100 -0.046763 0.003477 -0.075103 0.075770 -0.029643 -0.009620 -0.107931 -0.100106 -0.000807 0.040767 1.000000 -0.094168 0.015989 -0.011604 -0.044373 -0.072927 -0.021735 -0.081781 0.165078
GarageType 0.009743 0.099439 0.135136 -0.045583 0.032066 0.114822 -0.006936 0.063664 0.088574 0.021200 0.064196 0.013266 0.112935 -0.006125 0.012190 0.020894 0.061058 0.058091 0.135777 0.022384 0.155160 0.096753 0.058767 -0.039763 0.102446 -0.009329 0.049661 0.064946 0.116537 0.109852 0.116516 0.090222 -0.094168 1.000000 0.100185 0.106045 0.084393 0.061873 -0.000543 0.043612 -0.101786
GarageFinish -0.002604 0.005704 -0.010497 0.067334 0.055327 -0.034307 0.029645 0.018310 0.175353 0.026413 0.008562 0.016879 -0.029816 0.072046 0.028933 0.014384 0.013905 0.040864 0.063070 0.003008 -0.054610 0.105632 0.024657 0.015157 -0.051446 -0.030182 0.008418 -0.056186 0.017410 -0.003678 0.135430 0.014572 0.015989 0.100185 1.000000 -0.003700 0.002641 0.015989 -0.002995 0.055764 0.141148
GarageQual -0.014586 0.039895 -0.012414 -0.046539 -0.030841 -0.005059 0.006178 -0.014761 -0.030853 0.006257 0.062199 -0.048735 0.073689 -0.006440 0.024703 0.043935 0.101342 0.031918 0.031578 0.093184 0.181056 0.142403 0.069530 -0.047842 0.044489 -0.004602 0.086007 0.054182 0.150958 0.138483 0.082124 0.072965 -0.011604 0.106045 -0.003700 1.000000 0.614519 0.120247 -0.041587 -0.006622 -0.055261
GarageCond 0.000987 0.028618 -0.010851 -0.065380 -0.022491 -0.004422 0.034839 -0.008288 0.006989 0.002639 0.006563 -0.056821 0.065567 0.050651 0.059823 0.051225 0.085423 0.033172 0.053477 0.059977 0.144817 0.140402 0.165657 -0.033896 0.045777 0.025548 0.133256 0.090005 0.180947 0.163950 0.104778 0.042658 -0.044373 0.084393 0.002641 0.614519 1.000000 0.172531 -0.021168 -0.013230 -0.111891
PavedDrive 0.008921 0.117402 0.011248 -0.100666 0.081031 -0.007368 -0.065917 0.018009 0.065998 0.034886 0.054484 -0.064447 0.038707 0.011743 -0.033947 0.058754 0.119255 0.082059 0.166263 0.139432 0.243846 0.143237 0.116541 -0.070336 0.114083 -0.077327 0.141864 0.193215 0.275660 0.175431 0.164511 0.047388 -0.072927 0.061873 0.015989 0.120247 0.172531 1.000000 -0.021553 -0.049674 -0.208954
SaleType -0.016932 0.058948 0.021171 0.001435 -0.014537 0.057455 0.012120 -0.056680 0.008466 0.003128 0.011960 0.044289 -0.006154 -0.031824 -0.014712 -0.015554 -0.033452 0.071044 0.022013 0.003909 -0.050491 0.029998 0.060624 0.043161 -0.017829 -0.008694 0.018388 -0.009051 0.026522 -0.015363 -0.000178 -0.014494 -0.021735 -0.000543 -0.002995 -0.041587 -0.021168 -0.021553 1.000000 0.232149 0.072896
SaleCondition -0.017712 0.068705 0.022919 -0.004773 0.062826 0.019173 -0.013638 0.017187 0.007101 -0.019800 0.005158 0.040365 -0.029049 0.032990 0.055436 0.000631 -0.039999 0.106159 -0.009670 -0.038491 -0.093201 0.057392 -0.004425 0.094537 -0.067683 -0.060118 -0.032650 -0.049721 -0.010168 -0.022997 0.033660 -0.055108 -0.081781 0.043612 0.055764 -0.006622 -0.013230 -0.049674 0.232149 1.000000 0.142503
SalePrice -0.021917 -0.116047 -0.041036 0.267759 0.092009 -0.014314 0.109106 0.051152 -0.143621 -0.044820 -0.004833 -0.112611 -0.188688 0.159332 0.035820 -0.120586 -0.164716 -0.010695 -0.265015 -0.121706 -0.429678 -0.004053 -0.082016 0.229632 -0.360407 -0.046649 -0.106673 -0.427649 -0.251328 -0.231417 -0.114746 -0.108367 0.165078 -0.101786 0.141148 -0.055261 -0.111891 -0.208954 0.072896 0.142503 1.000000
In [55]:
sns.set(rc={'figure.figsize':(30,30)})
color = plt.get_cmap('summer') 
color.set_bad('lightblue')
sns.heatmap(data=df_c.corr(), square=True, annot=True, fmt='.2g',  cmap= color)
Out[55]:
<AxesSubplot: >

Correllation lower than 0.3¶

In [56]:
df_c_high = df_c.loc[:, df_c.corr().abs()['SalePrice'] > 0.3]
df_c_high['Id'] = df_c['Id']
first_column = df_c_high.pop('Id')
df_c_high.insert(0, 'Id', first_column)
In [57]:
df_c_high.head()
Out[57]:
Id Foundation BsmtFinType1 HeatingQC SalePrice
0 1 1 1 1 208500
1 2 2 2 1 181500
2 3 1 1 1 223500
3 4 3 2 2 140000
4 5 1 1 1 250000
In [58]:
sns.set(rc={'figure.figsize':(30,30)})
color = plt.get_cmap('summer')
color.set_bad('lightblue')
sns.heatmap(data=df_c_high.corr(), square=True, annot=True, fmt='.2g',  cmap= color)
Out[58]:
<AxesSubplot: >
In [59]:
models = ["Foundation", "BsmtFinType1", "HeatingQC"]
dfChi = df_c[models].copy()
dfChi
Out[59]:
Foundation BsmtFinType1 HeatingQC
0 1 1 1
1 2 2 1
2 1 1 1
3 3 2 2
4 1 1 1
5 4 1 1
6 1 1 1
7 2 2 1
8 3 3 2
9 3 1 1
10 2 4 1
11 1 1 1
12 2 2 3
13 1 3 1
14 2 5 3
15 3 3 1
16 2 2 1
17 5 3 3
18 1 1 1
19 2 6 3
20 1 3 1
21 1 3 1
22 1 3 1
23 1 1 3
24 2 4 1
25 1 3 1
26 2 5 3
27 1 1 1
28 2 5 3
29 3 3 4
30 3 3 3
31 2 3 2
32 1 3 1
33 2 4 2
34 1 1 1
35 1 3 1
36 1 3 1
37 2 4 2
38 2 1 3
39 1 3 3
40 2 4 1
41 2 5 2
42 2 2 2
43 2 6 3
44 2 2 1
45 1 1 1
46 1 1 1
47 1 1 1
48 3 3 2
49 2 5 1
50 1 1 2
51 1 3 3
52 2 6 3
53 2 1 2
54 2 2 3
55 2 5 2
56 1 1 1
57 1 3 1
58 1 3 1
59 2 2 1
60 1 2 1
61 3 3 3
62 1 1 1
63 3 3 2
64 1 1 1
65 1 3 1
66 2 1 3
67 1 1 1
68 2 3 3
69 2 5 3
70 2 2 1
71 2 2 3
72 1 3 2
73 2 2 2
74 2 3 2
75 2 1 3
76 2 2 1
77 2 5 3
78 2 3 3
79 1 3 2
80 2 4 3
81 1 1 1
82 1 1 1
83 2 3 2
84 1 3 2
85 2 3 1
86 1 3 1
87 1 3 1
88 2 3 3
89 1 1 1
90 5 3 3
91 2 4 3
92 3 2 1
93 3 4 1
94 1 1 1
95 1 2 2
96 1 2 1
97 2 4 3
98 3 2 4
99 2 2 4
100 1 2 2
101 2 3 3
102 5 3 3
103 1 3 1
104 1 6 1
105 1 3 1
106 3 3 2
107 2 2 1
108 2 3 3
109 2 5 1
110 2 4 2
111 1 1 1
112 1 1 1
113 2 2 3
114 2 2 1
115 1 1 2
116 2 6 3
117 1 3 2
118 1 1 2
119 1 3 1
120 2 2 1
121 1 3 2
122 2 5 2
123 1 1 1
124 2 3 3
125 2 1 2
126 2 2 3
127 3 6 1
128 2 5 3
129 2 4 3
130 2 4 1
131 1 1 1
132 2 4 2
133 1 1 1
134 2 4 2
135 1 3 2
136 2 5 3
137 2 3 2
138 1 1 1
139 1 1 1
140 2 2 3
141 1 1 1
142 2 4 4
143 1 1 1
144 2 4 3
145 1 3 1
146 3 5 3
147 1 3 1
148 1 1 1
149 3 3 2
150 2 5 3
151 1 1 1
152 2 4 2
153 2 5 2
154 3 3 3
155 3 3 4
156 2 3 3
157 1 3 1
158 1 1 1
159 1 1 1
160 1 5 3
161 1 1 1
162 1 5 1
163 2 3 1
164 3 6 2
165 3 2 2
166 2 6 1
167 1 1 1
168 1 3 1
169 2 3 3
170 2 4 3
171 2 5 4
172 1 1 2
173 2 2 3
174 2 2 3
175 2 2 3
176 1 1 3
177 2 2 1
178 1 1 1
179 2 3 3
180 1 1 1
181 3 6 2
182 1 3 1
183 1 3 1
184 1 3 2
185 3 3 1
186 1 1 1
187 2 3 1
188 2 1 3
189 1 1 1
190 2 4 3
191 2 2 2
192 1 1 1
193 1 3 1
194 2 2 3
195 2 2 3
196 1 1 1
197 1 1 2
198 2 3 1
199 1 1 1
200 1 3 1
201 2 2 3
202 3 6 2
203 1 1 1
204 2 6 3
205 1 3 1
206 2 3 3
207 1 2 3
208 2 1 1
209 2 4 1
210 2 4 3
211 1 1 1
212 1 1 1
213 1 2 1
214 2 1 1
215 2 5 3
216 1 1 1
217 2 3 4
218 2 5 1
219 1 1 1
220 1 3 1
221 1 3 1
222 2 2 3
223 2 2 2
224 1 1 1
225 2 3 3
226 1 1 1
227 2 4 3
228 2 2 3
229 1 1 1
230 2 3 3
231 1 1 1
232 2 3 3
233 2 6 3
234 1 1 1
235 2 2 3
236 1 1 1
237 1 5 1
238 1 3 1
239 2 6 3
240 1 1 1
241 2 2 2
242 3 3 2
243 2 3 3
244 1 1 1
245 2 1 2
246 6 3 2
247 2 3 1
248 1 3 1
249 2 4 2
250 2 3 1
251 1 1 1
252 1 3 1
253 2 6 1
254 2 4 3
255 1 3 1
256 1 4 1
257 1 1 1
258 1 1 1
259 1 3 2
260 2 4 3
261 1 3 1
262 2 2 3
263 3 6 3
264 3 3 2
265 2 1 3
266 1 1 2
267 1 6 1
268 2 2 1
269 2 5 3
270 1 3 2
271 2 6 1
272 1 1 1
273 2 4 2
274 2 2 3
275 3 3 2
276 1 3 1
277 2 6 1
278 1 3 1
279 2 5 1
280 2 1 2
281 1 1 2
282 1 1 1
283 1 3 1
284 1 1 2
285 1 3 1
286 2 4 3
287 2 5 3
288 2 5 3
289 3 3 1
290 1 3 1
291 1 4 1
292 2 6 2
293 2 2 4
294 2 1 3
295 2 1 3
296 2 5 3
297 1 5 1
298 2 2 2
299 2 3 1
300 2 5 3
301 1 1 1
302 1 3 1
303 1 2 3
304 3 3 1
305 1 1 1
306 2 2 2
307 2 3 3
308 2 5 1
309 1 1 1
310 1 2 2
311 2 2 1
312 2 4 3
313 2 2 3
314 3 6 1
315 1 1 1
316 2 1 3
317 1 3 1
318 1 1 1
319 2 1 3
320 1 3 1
321 1 1 1
322 2 6 1
323 2 2 1
324 2 3 1
325 2 5 5
326 1 1 1
327 2 4 3
328 3 3 2
329 1 3 3
330 2 1 3
331 2 4 4
332 1 1 1
333 1 1 1
334 1 1 2
335 2 2 1
336 1 1 1
337 1 1 1
338 2 2 1
339 2 4 3
340 1 3 1
341 2 3 2
342 5 3 4
343 1 1 1
344 2 4 3
345 3 3 2
346 2 5 3
347 2 5 1
348 1 1 1
349 1 1 1
350 1 3 1
351 1 1 2
352 2 6 3
353 3 3 2
354 2 6 1
355 1 2 1
356 1 1 2
357 1 2 3
358 2 5 1
359 1 1 1
360 2 1 1
361 2 4 2
362 5 3 1
363 2 2 2
364 1 1 3
365 2 4 2
366 2 5 2
367 2 1 2
368 1 6 2
369 2 2 1
370 1 3 1
371 2 3 3
372 2 2 3
373 1 5 3
374 1 3 1
375 3 5 2
376 1 1 1
377 1 3 1
378 1 1 1
379 1 3 1
380 3 6 3
381 1 3 1
382 1 3 1
383 3 3 3
384 1 1 1
385 1 1 1
386 1 3 3
387 2 2 1
388 1 3 1
389 1 1 1
390 3 4 1
391 1 2 1
392 5 3 3
393 2 4 3
394 2 3 2
395 2 5 3
396 2 4 3
397 2 2 3
398 3 3 2
399 1 1 1
400 1 1 1
401 1 1 1
402 1 3 1
403 1 3 2
404 1 3 2
405 2 5 1
406 3 3 1
407 3 3 1
408 1 3 1
409 1 1 1
410 2 3 3
411 2 4 3
412 1 1 1
413 2 3 2
414 1 5 1
415 1 1 1
416 2 2 3
417 3 5 3
418 3 2 4
419 2 5 1
420 1 1 1
421 2 2 2
422 2 3 1
423 1 1 2
424 2 6 2
425 2 3 2
426 1 1 2
427 2 4 1
428 1 1 1
429 2 1 3
430 2 2 3
431 3 3 2
432 2 2 1
433 1 3 1
434 2 2 2
435 1 1 1
436 3 3 3
437 1 3 2
438 1 6 3
439 3 3 2
440 1 1 1
441 2 2 3
442 1 3 3
443 1 3 1
444 1 1 1
445 2 3 4
446 2 2 1
447 1 3 2
448 3 3 3
449 2 2 1
450 3 3 1
451 2 2 3
452 1 2 1
453 1 3 1
454 2 2 3
455 2 2 4
456 3 3 4
457 2 5 1
458 1 3 4
459 2 6 3
460 1 1 1
461 1 2 1
462 2 4 2
463 2 6 4
464 2 4 3
465 1 3 1
466 2 1 3
467 2 6 1
468 1 3 1
469 1 1 1
470 1 1 3
471 2 3 3
472 1 1 1
473 1 1 1
474 1 1 1
475 2 1 3
476 1 1 1
477 1 3 1
478 1 1 1
479 3 4 3
480 1 1 1
481 1 1 1
482 1 2 1
483 1 1 1
484 2 2 2
485 2 2 2
486 2 2 3
487 2 2 2
488 2 5 3
489 2 1 3
490 1 3 3
491 2 5 3
492 1 3 2
493 2 5 3
494 3 3 2
495 2 4 3
496 1 1 1
497 1 4 1
498 1 2 1
499 2 5 4
500 2 5 3
501 1 1 2
502 2 2 1
503 2 2 3
504 2 2 3
505 2 3 2
506 1 1 1
507 1 1 1
508 3 4 1
509 2 2 1
510 2 5 1
511 1 3 1
512 2 5 3
513 1 2 3
514 3 3 4
515 2 1 1
516 2 3 3
517 1 3 1
518 1 1 1
519 3 3 1
520 3 3 3
521 2 4 1
522 2 2 1
523 1 1 1
524 1 1 1
525 1 3 1
526 2 4 3
527 1 1 1
528 3 2 1
529 1 4 3
530 2 1 1
531 3 3 1
532 5 3 1
533 5 3 4
534 1 3 1
535 2 1 3
536 1 3 1
537 2 5 3
538 2 5 3
539 1 1 1
540 1 1 1
541 1 3 1
542 1 6 1
543 1 1 1
544 1 1 1
545 1 1 2
546 3 4 1
547 2 2 1
548 2 6 3
549 1 3 1
550 2 2 3
551 2 4 2
552 1 3 1
553 2 3 1
554 1 1 1
555 3 3 3
556 2 4 3
557 2 4 2
558 1 1 1
559 1 3 1
560 2 2 3
561 2 2 2
562 2 5 1
563 3 3 1
564 1 1 1
565 1 3 3
566 1 3 1
567 1 1 1
568 2 1 1
569 2 1 3
570 2 6 3
571 2 5 1
572 1 3 2
573 1 3 1
574 2 2 3
575 2 4 3
576 3 3 2
577 2 6 1
578 1 3 1
579 3 3 1
580 2 5 1
581 1 3 1
582 2 1 3
583 3 3 2
584 3 3 1
585 1 1 1
586 3 4 1
587 2 2 3
588 2 1 1
589 3 3 3
590 1 1 1
591 1 1 1
592 1 1 1
593 1 1 1
594 2 3 3
595 1 1 1
596 2 3 1
597 1 3 1
598 2 2 1
599 2 6 3
600 1 1 1
601 1 3 3
602 1 1 1
603 1 3 1
604 1 1 1
605 2 5 3
606 1 1 2
607 2 1 1
608 1 5 3
609 2 4 2
610 1 1 1
611 2 2 3
612 1 1 1
613 1 2 1
614 2 6 1
615 1 1 1
616 1 1 1
617 2 3 2
618 1 1 1
619 1 1 1
620 3 5 3
621 2 2 3
622 2 5 3
623 1 1 1
624 2 4 2
625 2 3 3
626 2 3 3
627 2 5 1
628 1 4 3
629 2 1 3
630 3 3 3
631 1 1 1
632 2 2 3
633 2 5 3
634 2 1 2
635 2 6 1
636 3 3 4
637 2 3 3
638 2 3 2
639 1 1 1
640 1 1 1
641 1 1 1
642 2 1 2
643 2 4 1
644 1 1 1
645 2 2 3
646 2 3 2
647 2 4 1
648 2 3 3
649 2 5 2
650 1 3 1
651 2 3 3
652 1 3 1
653 2 3 1
654 1 1 1
655 2 3 3
656 2 2 1
657 3 3 3
658 2 3 3
659 1 5 1
660 2 3 3
661 1 1 1
662 2 3 2
663 2 5 3
664 1 1 1
665 1 3 1
666 2 6 3
667 1 1 1
668 2 4 2
669 3 3 1
670 1 1 1
671 3 5 3
672 2 2 3
673 1 1 1
674 2 4 3
675 2 2 3
676 3 3 4
677 3 3 1
678 1 3 1
679 2 4 3
680 2 5 3
681 3 4 1
682 1 1 1
683 1 1 1
684 1 3 2
685 2 1 2
686 1 3 1
687 1 1 1
688 1 1 1
689 1 2 1
690 1 1 1
691 1 1 1
692 1 1 1
693 3 3 1
694 3 3 3
695 2 1 2
696 2 6 2
697 1 6 4
698 2 1 1
699 1 1 1
700 1 1 1
701 2 3 3
702 1 3 1
703 3 3 2
704 1 1 1
705 5 3 4
706 2 1 3
707 1 1 1
708 1 3 1
709 1 3 3
710 2 3 2
711 6 3 2
712 1 1 2
713 2 2 3
714 2 1 1
715 2 3 3
716 3 3 1
717 2 5 2
718 1 1 1
719 2 2 3
720 1 1 3
721 1 1 1
722 2 2 1
723 1 3 1
724 1 1 1
725 2 2 3
726 2 1 1
727 1 1 1
728 2 3 3
729 2 5 3
730 1 1 1
731 1 1 1
732 1 2 1
733 2 5 1
734 1 3 3
735 3 4 4
736 2 3 2
737 1 3 1
738 2 1 3
739 1 3 1
740 3 3 1
741 2 1 2
742 1 3 1
743 2 2 1
744 1 1 2
745 2 1 1
746 1 1 1
747 6 3 3
748 1 1 1
749 5 3 3
750 3 3 2
751 1 3 1
752 1 1 1
753 1 3 2
754 2 1 2
755 1 1 2
756 1 1 1
757 2 4 3
758 1 2 2
759 1 6 1
760 2 4 1
761 1 6 3
762 1 1 1
763 1 1 1
764 1 6 1
765 1 3 1
766 2 1 3
767 2 2 1
768 1 1 1
769 1 2 1
770 2 1 3
771 2 6 3
772 2 2 3
773 2 4 1
774 1 3 2
775 1 1 1
776 1 1 1
777 2 2 3
778 5 3 3
779 2 1 3
780 1 3 2
781 1 2 1
782 1 3 1
783 1 1 1
784 2 3 1
785 2 4 3
786 1 6 3
787 1 1 1
788 2 3 1
789 2 6 2
790 1 1 1
791 1 2 3
792 1 1 1
793 1 3 1
794 1 3 1
795 2 3 3
796 2 3 1
797 2 5 2
798 1 3 1
799 3 2 1
800 1 1 2
801 2 6 1
802 1 1 1
803 1 3 1
804 2 6 3
805 1 3 1
806 1 2 2
807 2 1 1
808 2 2 3
809 1 3 3
810 2 2 4
811 1 1 1
812 2 3 3
813 2 4 2
814 3 3 2
815 1 3 1
816 2 5 2
817 1 1 1
818 2 1 2
819 1 1 1
820 1 3 1
821 2 3 3
822 1 3 1
823 3 3 3
824 1 3 1
825 1 1 1
826 3 2 2
827 1 1 1
828 1 6 3
829 1 3 1
830 2 4 3
831 1 3 1
832 1 1 1
833 2 4 3
834 2 4 2
835 2 5 3
836 3 4 2
837 2 5 3
838 1 3 1
839 2 5 1
840 3 5 3
841 1 3 2
842 2 4 3
843 2 3 1
844 3 3 2
845 2 2 3
846 1 2 1
847 2 5 1
848 3 3 3
849 2 3 1
850 1 1 1
851 1 3 1
852 2 4 2
853 2 4 3
854 2 2 3
855 2 2 1
856 2 1 3
857 1 3 2
858 2 3 3
859 2 5 3
860 3 3 2
861 1 5 1
862 1 1 3
863 2 5 3
864 1 3 1
865 2 5 3
866 1 3 1
867 2 2 3
868 5 3 2
869 1 1 1
870 2 3 2
871 1 1 1
872 6 3 2
873 2 4 2
874 2 3 1
875 1 1 1
876 1 2 2
877 1 1 1
878 2 2 3
879 2 2 1
880 1 2 2
881 2 6 1
882 1 3 2
883 1 3 3
884 2 5 3
885 1 1 1
886 2 6 1
887 1 3 3
888 1 2 1
889 2 5 1
890 2 3 1
891 2 6 2
892 2 1 3
893 1 5 3
894 5 3 3
895 2 5 3
896 3 2 1
897 5 3 3
898 1 1 1
899 2 5 3
900 2 2 3
901 2 5 3
902 1 3 1
903 1 3 1
904 2 1 3
905 2 4 3
906 1 1 1
907 2 4 2
908 2 5 3
909 1 3 1
910 2 4 3
911 2 2 3
912 3 4 3
913 2 5 3
914 1 1 1
915 2 3 4
916 2 5 3
917 2 2 1
918 1 5 1
919 2 2 1
920 1 1 1
921 2 2 2
922 1 1 1
923 1 6 1
924 2 5 3
925 1 2 3
926 1 3 1
927 2 5 2
928 1 3 1
929 1 3 2
930 1 1 1
931 2 5 1
932 1 3 1
933 1 1 1
934 2 1 1
935 1 3 2
936 1 1 1
937 1 1 1
938 1 1 1
939 2 3 1
940 2 4 3
941 1 2 1
942 1 1 3
943 2 3 3
944 2 1 2
945 3 6 3
946 2 2 3
947 1 1 1
948 1 3 1
949 2 2 3
950 2 2 2
951 2 5 2
952 2 1 2
953 2 2 1
954 2 1 3
955 2 4 3
956 2 2 3
957 2 4 3
958 1 1 1
959 1 2 1
960 2 5 2
961 2 2 3
962 2 2 3
963 1 3 1
964 1 1 1
965 1 3 1
966 3 4 1
967 2 2 3
968 3 3 4
969 2 2 3
970 2 3 3
971 1 1 1
972 2 3 3
973 1 3 1
974 3 3 3
975 1 1 1
976 1 3 3
977 1 1 1
978 2 6 1
979 1 4 3
980 2 2 2
981 1 1 1
982 1 3 1
983 1 3 1
984 2 3 3
985 2 2 3
986 2 3 2
987 1 1 1
988 2 6 3
989 1 3 1
990 1 1 1
991 3 5 2
992 2 5 2
993 1 3 1
994 1 1 1
995 2 2 3
996 2 4 3
997 2 3 3
998 3 3 4
999 1 1 1
1000 5 3 4
1001 3 3 1
1002 1 1 1
1003 2 3 4
1004 1 1 1
1005 2 1 3
1006 2 6 2
1007 2 5 3
1008 1 3 1
1009 3 3 1
1010 2 3 3
1011 2 3 3
1012 1 3 3
1013 3 2 3
1014 2 5 3
1015 1 1 1
1016 1 1 1
1017 2 1 2
1018 1 3 2
1019 1 1 1
1020 1 1 1
1021 1 1 1
1022 2 6 2
1023 1 1 1
1024 2 2 3
1025 2 6 3
1026 2 4 3
1027 1 1 1
1028 2 4 3
1029 2 5 2
1030 1 3 2
1031 3 1 1
1032 1 1 1
1033 1 1 1
1034 1 3 1
1035 5 3 1
1036 1 1 1
1037 1 3 1
1038 2 3 3
1039 2 1 3
1040 2 4 1
1041 2 1 2
1042 1 2 1
1043 1 1 1
1044 1 2 3
1045 5 3 1
1046 1 1 1
1047 1 1 1
1048 5 3 3
1049 2 3 1
1050 1 3 1
1051 1 3 1
1052 2 4 3
1053 2 4 1
1054 1 1 1
1055 2 2 3
1056 1 1 1
1057 1 1 1
1058 1 1 1
1059 2 4 3
1060 1 1 1
1061 2 3 3
1062 3 3 3
1063 3 5 2
1064 2 5 1
1065 1 1 1
1066 1 3 2
1067 2 1 3
1068 2 2 2
1069 2 2 3
1070 2 5 3
1071 2 4 1
1072 2 3 4
1073 2 5 3
1074 1 3 1
1075 2 5 3
1076 2 2 2
1077 2 5 1
1078 1 1 1
1079 1 1 2
1080 2 2 1
1081 2 2 4
1082 1 3 1
1083 2 5 3
1084 1 2 2
1085 1 1 1
1086 2 5 3
1087 1 3 1
1088 1 3 1
1089 1 1 1
1090 5 3 4
1091 1 5 1
1092 1 4 4
1093 2 1 2
1094 2 5 1
1095 1 1 2
1096 1 3 3
1097 2 3 1
1098 3 5 3
1099 2 2 3
1100 2 4 3
1101 2 5 3
1102 2 4 1
1103 2 2 1
1104 2 3 3
1105 1 1 1
1106 1 1 2
1107 1 3 1
1108 1 3 1
1109 1 1 1
1110 1 1 2
1111 2 2 3
1112 2 1 3
1113 2 5 2
1114 2 4 1
1115 1 1 1
1116 1 1 1
1117 2 5 1
1118 2 3 3
1119 2 6 3
1120 3 3 3
1121 1 1 1
1122 2 3 1
1123 2 3 1
1124 1 3 2
1125 2 3 3
1126 1 3 1
1127 1 1 1
1128 1 3 1
1129 2 1 3
1130 3 5 3
1131 1 5 3
1132 3 3 3
1133 1 1 1
1134 1 3 2
1135 3 3 3
1136 2 5 3
1137 2 3 2
1138 1 2 3
1139 3 5 3
1140 2 2 3
1141 2 2 1
1142 1 1 1
1143 2 1 3
1144 3 5 4
1145 3 3 1
1146 2 1 3
1147 2 4 1
1148 1 3 3
1149 1 2 1
1150 2 3 1
1151 2 5 3
1152 1 2 1
1153 3 2 1
1154 2 2 3
1155 2 2 3
1156 1 2 2
1157 1 1 1
1158 1 3 1
1159 2 2 3
1160 2 2 4
1161 2 5 1
1162 2 5 2
1163 2 1 3
1164 1 2 1
1165 1 3 1
1166 1 3 1
1167 1 1 1
1168 2 4 1
1169 1 1 1
1170 2 2 3
1171 2 2 1
1172 1 3 1
1173 2 4 3
1174 3 4 3
1175 1 1 1
1176 2 2 3
1177 2 4 3
1178 3 3 1
1179 5 3 2
1180 4 6 1
1181 1 1 1
1182 1 1 1
1183 3 4 1
1184 2 4 2
1185 3 5 2
1186 2 5 3
1187 1 1 1
1188 1 3 1
1189 1 3 2
1190 2 5 1
1191 1 3 1
1192 1 3 2
1193 1 1 1
1194 2 2 3
1195 1 3 1
1196 1 3 1
1197 3 3 1
1198 1 3 1
1199 2 4 2
1200 2 3 2
1201 1 3 1
1202 3 3 1
1203 1 3 1
1204 2 2 1
1205 2 2 2
1206 2 4 3
1207 1 1 1
1208 2 4 3
1209 1 1 1
1210 1 3 1
1211 4 1 2
1212 2 4 3
1213 2 1 1
1214 2 5 3
1215 2 4 3
1216 5 3 3
1217 1 1 1
1218 5 3 2
1219 2 3 3
1220 2 4 3
1221 2 5 1
1222 2 4 1
1223 2 6 3
1224 1 1 1
1225 2 1 1
1226 1 3 1
1227 2 2 1
1228 1 1 1
1229 2 2 2
1230 2 1 1
1231 2 2 3
1232 5 3 3
1233 2 4 4
1234 1 3 3
1235 3 3 2
1236 1 3 1
1237 1 3 1
1238 1 3 1
1239 1 1 1
1240 1 1 1
1241 1 3 1
1242 2 1 3
1243 1 1 1
1244 1 3 4
1245 2 3 1
1246 1 3 1
1247 2 1 3
1248 3 4 4
1249 2 5 3
1250 2 2 1
1251 1 3 1
1252 2 5 3
1253 2 6 3
1254 1 3 1
1255 3 6 3
1256 1 1 1
1257 1 3 1
1258 1 1 1
1259 2 2 2
1260 1 3 1
1261 2 4 2
1262 2 3 1
1263 1 3 1
1264 1 1 1
1265 1 1 1
1266 2 3 3
1267 1 3 1
1268 2 4 1
1269 2 5 3
1270 1 1 3
1271 2 3 2
1272 2 5 3
1273 2 2 2
1274 1 3 1
1275 2 4 2
1276 2 5 3
1277 2 2 4
1278 1 1 1
1279 2 3 3
1280 1 1 1
1281 2 2 3
1282 2 6 3
1283 2 3 3
1284 1 3 1
1285 2 3 1
1286 2 2 3
1287 2 4 4
1288 1 1 1
1289 1 3 1
1290 1 1 3
1291 2 2 3
1292 6 3 3
1293 1 1 3
1294 2 4 3
1295 2 5 2
1296 2 2 2
1297 1 1 2
1298 1 1 1
1299 2 6 1
1300 1 1 1
1301 2 5 4
1302 1 1 1
1303 1 3 1
1304 1 3 1
1305 1 1 1
1306 1 3 1
1307 1 2 1
1308 2 1 1
1309 2 1 2
1310 1 1 3
1311 1 1 1
1312 1 1 1
1313 1 3 1
1314 2 4 1
1315 2 2 3
1316 1 3 1
1317 1 3 1
1318 1 3 1
1319 1 2 1
1320 2 2 2
1321 2 3 3
1322 1 1 1
1323 2 6 2
1324 1 3 1
1325 3 3 4
1326 3 4 1
1327 2 2 1
1328 3 6 1
1329 1 3 2
1330 1 3 1
1331 2 2 2
1332 2 2 1
1333 2 3 1
1334 2 6 3
1335 2 2 3
1336 2 3 3
1337 2 3 4
1338 1 1 1
1339 2 4 1
1340 2 3 3
1341 1 1 1
1342 1 3 1
1343 3 3 2
1344 1 3 1
1345 1 2 2
1346 2 5 3
1347 1 1 1
1348 1 1 1
1349 3 6 4
1350 2 6 3
1351 2 4 1
1352 3 3 3
1353 1 1 1
1354 1 1 1
1355 2 3 2
1356 2 4 3
1357 2 1 1
1358 1 1 1
1359 1 1 1
1360 3 3 1
1361 1 2 1
1362 3 3 2
1363 1 3 1
1364 1 3 1
1365 1 1 1
1366 1 1 1
1367 1 2 1
1368 1 1 1
1369 1 5 1
1370 1 2 1
1371 2 5 2
1372 1 1 1
1373 1 1 1
1374 1 3 1
1375 1 3 1
1376 3 4 3
1377 2 6 1
1378 2 5 3
1379 1 3 2
1380 3 4 3
1381 2 5 1
1382 3 3 1
1383 3 3 1
1384 3 4 3
1385 3 5 2
1386 2 5 3
1387 3 4 3
1388 1 1 1
1389 3 2 1
1390 1 1 1
1391 2 3 3
1392 2 2 3
1393 3 3 1
1394 1 1 1
1395 1 3 1
1396 2 5 3
1397 3 3 1
1398 2 4 2
1399 3 5 1
1400 3 3 3
1401 1 1 1
1402 1 3 1
1403 1 1 1
1404 1 3 3
1405 1 1 1
1406 2 1 3
1407 2 2 1
1408 2 3 2
1409 2 3 2
1410 1 1 1
1411 2 5 1
1412 5 3 4
1413 1 1 1
1414 3 3 2
1415 1 2 1
1416 1 3 2
1417 1 1 1
1418 2 5 3
1419 2 4 2
1420 2 2 2
1421 2 2 3
1422 1 1 1
1423 2 3 3
1424 2 2 3
1425 2 3 1
1426 1 1 1
1427 2 5 3
1428 2 5 3
1429 2 5 1
1430 1 3 1
1431 2 6 3
1432 3 3 3
1433 1 3 1
1434 2 2 4
1435 2 3 3
1436 1 2 3
1437 1 1 1
1438 2 1 3
1439 2 1 3
1440 3 3 4
1441 1 1 1
1442 1 1 1
1443 3 3 4
1444 1 3 1
1445 2 6 2
1446 2 4 3
1447 1 1 1
1448 2 3 2
1449 2 1 1
1450 2 3 3
1451 1 3 1
1452 1 1 2
1453 1 3 1
1454 1 1 1
1455 1 3 1
1456 2 2 3
1457 6 1 1
1458 2 1 2
1459 2 5 2
In [61]:
skewValue = df_c_high.skew(axis=0, numeric_only=True)
print(skewValue)
sns.pairplot(df_c_high)
Id              0.000000
Foundation      1.686723
BsmtFinType1    0.518622
HeatingQC       0.540458
SalePrice       1.882876
dtype: float64
Out[61]:
<seaborn.axisgrid.PairGrid at 0x7fcabc3240d0>

Merging The Dataframe¶

In [62]:
df_merge = pd.merge(df2_n, df_c_high, how='outer', on = 'Id')
df_merge.head()
Out[62]:
Id LotFrontage OverallQual YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 TotalBsmtSF 1stFlrSF 2ndFlrSF GrLivArea FullBath TotRmsAbvGrd Fireplaces GarageYrBlt GarageCars GarageArea WoodDeckSF OpenPorchSF SalePrice_x Foundation BsmtFinType1 HeatingQC SalePrice_y
0 1 65.0 7 2003 2003 196.0 706 856 856 854 1710 2 8 0 2003.0 2 548 0 61 208500 1 1 1 208500
1 2 80.0 6 1976 1976 0.0 978 1262 1262 0 1262 2 6 1 1976.0 2 460 298 0 181500 2 2 1 181500
2 3 68.0 7 2001 2002 162.0 486 920 920 866 1786 2 6 1 2001.0 2 608 0 42 223500 1 1 1 223500
3 4 60.0 7 1915 1970 0.0 216 756 961 756 1717 1 7 1 1998.0 3 642 0 35 140000 3 2 2 140000
4 5 84.0 8 2000 2000 350.0 655 1145 1145 1053 2198 2 9 1 2000.0 3 836 192 84 250000 1 1 1 250000
In [63]:
df_merge.shape
Out[63]:
(1460, 24)
In [64]:
df_merge.corr()
Out[64]:
Id LotFrontage OverallQual YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 TotalBsmtSF 1stFlrSF 2ndFlrSF GrLivArea FullBath TotRmsAbvGrd Fireplaces GarageYrBlt GarageCars GarageArea WoodDeckSF OpenPorchSF SalePrice_x Foundation BsmtFinType1 HeatingQC SalePrice_y
Id 1.000000 -0.028557 -0.029109 -0.012084 -0.021998 -0.041853 -0.013375 -0.023009 0.005143 0.005193 0.002683 0.005587 0.021054 -0.018479 -0.000081 0.013672 0.013392 -0.033566 -0.011482 -0.027439 0.020023 0.012783 0.015001 -0.021917
LotFrontage -0.028557 1.000000 0.243394 0.141219 0.076761 0.212129 0.146139 0.336165 0.392400 0.069453 0.341649 0.192218 0.336975 0.227339 0.078353 0.311437 0.345168 0.104215 0.147604 0.376300 -0.097487 -0.053252 -0.081754 0.371558
OverallQual -0.029109 0.243394 1.000000 0.575160 0.550829 0.418265 0.229718 0.541432 0.473935 0.294860 0.597218 0.550501 0.435768 0.396809 0.519893 0.609159 0.568185 0.246423 0.358426 0.817680 -0.477282 -0.375150 -0.457410 0.791965
YearBuilt -0.012084 0.141219 0.575160 1.000000 0.594909 0.350369 0.252814 0.410450 0.288419 0.011488 0.214644 0.469824 0.106356 0.148330 0.782454 0.546705 0.487446 0.238377 0.262130 0.570327 -0.648157 -0.448995 -0.449252 0.524172
YearRemodAdd -0.021998 0.076761 0.550829 0.594909 1.000000 0.188731 0.126337 0.301581 0.246662 0.140013 0.296849 0.439046 0.196675 0.110941 0.618391 0.425546 0.377335 0.222702 0.280916 0.552061 -0.477886 -0.399609 -0.550017 0.507101
MasVnrArea -0.041853 0.212129 0.418265 0.350369 0.188731 1.000000 0.250482 0.358988 0.339379 0.143741 0.363829 0.276848 0.281925 0.252411 0.269059 0.388521 0.386561 0.161049 0.176371 0.454043 -0.222327 -0.184680 -0.160608 0.452127
BsmtFinSF1 -0.013375 0.146139 0.229718 0.252814 0.126337 0.250482 1.000000 0.467348 0.395381 -0.157621 0.138386 0.052875 0.017356 0.243934 0.148649 0.231241 0.278344 0.209158 0.088825 0.387583 -0.204704 -0.328471 -0.082747 0.400319
TotalBsmtSF -0.023009 0.336165 0.541432 0.410450 0.301581 0.358988 0.467348 1.000000 0.807158 -0.206180 0.404359 0.331598 0.271000 0.328147 0.322837 0.461365 0.485658 0.241778 0.249657 0.645250 -0.420562 -0.222359 -0.274791 0.636999
1stFlrSF 0.005143 0.392400 0.473935 0.288419 0.246662 0.339379 0.395381 0.807158 1.000000 -0.227040 0.526708 0.383459 0.401270 0.404117 0.230210 0.459572 0.487425 0.238082 0.204385 0.621874 -0.189160 -0.181097 -0.195179 0.620740
2ndFlrSF 0.005193 0.069453 0.294860 0.011488 0.140013 0.143741 -0.157621 -0.206180 -0.227040 1.000000 0.691087 0.420874 0.610949 0.195192 0.069278 0.183574 0.141130 0.089820 0.219093 0.316508 -0.071301 -0.019549 -0.141724 0.316547
GrLivArea 0.002683 0.341649 0.597218 0.214644 0.296849 0.363829 0.138386 0.404359 0.526708 0.691087 1.000000 0.641858 0.835194 0.462469 0.227572 0.487803 0.471376 0.243034 0.347630 0.729315 -0.198735 -0.144240 -0.260799 0.708136
FullBath 0.005587 0.192218 0.550501 0.469824 0.439046 0.276848 0.052875 0.331598 0.383459 0.420874 0.641858 1.000000 0.556469 0.245253 0.468694 0.476141 0.414915 0.200786 0.299482 0.583994 -0.318128 -0.271030 -0.333499 0.560664
TotRmsAbvGrd 0.021054 0.336975 0.435768 0.106356 0.196675 0.281925 0.017356 0.271000 0.401270 0.610949 0.835194 0.556469 1.000000 0.329225 0.142458 0.372320 0.339538 0.166798 0.252140 0.539339 -0.101301 -0.061963 -0.170769 0.536067
Fireplaces -0.018479 0.227339 0.396809 0.148330 0.110941 0.252411 0.243934 0.328147 0.404117 0.195192 0.462469 0.245253 0.329225 1.000000 0.045929 0.308721 0.268397 0.201902 0.174771 0.488848 -0.124461 -0.080684 -0.130323 0.469543
GarageYrBlt -0.000081 0.078353 0.519893 0.782454 0.618391 0.269059 0.148649 0.322837 0.230210 0.069278 0.227572 0.468694 0.142458 0.045929 1.000000 0.489006 0.482098 0.236382 0.278921 0.503068 -0.566991 -0.435907 -0.457660 0.470269
GarageCars 0.013672 0.311437 0.609159 0.546705 0.425546 0.388521 0.231241 0.461365 0.459572 0.183574 0.487803 0.476141 0.372320 0.308721 0.489006 1.000000 0.895027 0.233857 0.265562 0.678508 -0.376198 -0.314252 -0.330534 0.646652
GarageArea 0.013392 0.345168 0.568185 0.487446 0.377335 0.386561 0.278344 0.485658 0.487425 0.141130 0.471376 0.414915 0.339538 0.268397 0.482098 0.895027 1.000000 0.232125 0.277130 0.660029 -0.335340 -0.289625 -0.298013 0.630135
WoodDeckSF -0.033566 0.104215 0.246423 0.238377 0.222702 0.161049 0.209158 0.241778 0.238082 0.089820 0.243034 0.200786 0.166798 0.201902 0.236382 0.233857 0.232125 1.000000 0.081119 0.343660 -0.192402 -0.172493 -0.141086 0.330378
OpenPorchSF -0.011482 0.147604 0.358426 0.262130 0.280916 0.176371 0.088825 0.249657 0.204385 0.219093 0.347630 0.299482 0.252140 0.174771 0.278921 0.265562 0.277130 0.081119 1.000000 0.394637 -0.253742 -0.166635 -0.229056 0.369024
SalePrice_x -0.027439 0.376300 0.817680 0.570327 0.552061 0.454043 0.387583 0.645250 0.621874 0.316508 0.729315 0.583994 0.539339 0.488848 0.503068 0.678508 0.660029 0.343660 0.394637 1.000000 -0.466808 -0.378836 -0.460160 0.955438
Foundation 0.020023 -0.097487 -0.477282 -0.648157 -0.477886 -0.222327 -0.204704 -0.420562 -0.189160 -0.071301 -0.198735 -0.318128 -0.101301 -0.124461 -0.566991 -0.376198 -0.335340 -0.192402 -0.253742 -0.466808 1.000000 0.329651 0.419451 -0.429678
BsmtFinType1 0.012783 -0.053252 -0.375150 -0.448995 -0.399609 -0.184680 -0.328471 -0.222359 -0.181097 -0.019549 -0.144240 -0.271030 -0.061963 -0.080684 -0.435907 -0.314252 -0.289625 -0.172493 -0.166635 -0.378836 0.329651 1.000000 0.303916 -0.360407
HeatingQC 0.015001 -0.081754 -0.457410 -0.449252 -0.550017 -0.160608 -0.082747 -0.274791 -0.195179 -0.141724 -0.260799 -0.333499 -0.170769 -0.130323 -0.457660 -0.330534 -0.298013 -0.141086 -0.229056 -0.460160 0.419451 0.303916 1.000000 -0.427649
SalePrice_y -0.021917 0.371558 0.791965 0.524172 0.507101 0.452127 0.400319 0.636999 0.620740 0.316547 0.708136 0.560664 0.536067 0.469543 0.470269 0.646652 0.630135 0.330378 0.369024 0.955438 -0.429678 -0.360407 -0.427649 1.000000
In [65]:
sns.set(rc={'figure.figsize':(30,30)})
color = plt.get_cmap('summer')
color.set_bad('lightblue')
sns.heatmap(data=df_merge.corr(), square=True, annot=True, fmt='.2g',  cmap= color)
Out[65]:
<AxesSubplot: >
In [66]:
df_final = df_merge
df_final.head()
Out[66]:
Id LotFrontage OverallQual YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 TotalBsmtSF 1stFlrSF 2ndFlrSF GrLivArea FullBath TotRmsAbvGrd Fireplaces GarageYrBlt GarageCars GarageArea WoodDeckSF OpenPorchSF SalePrice_x Foundation BsmtFinType1 HeatingQC SalePrice_y
0 1 65.0 7 2003 2003 196.0 706 856 856 854 1710 2 8 0 2003.0 2 548 0 61 208500 1 1 1 208500
1 2 80.0 6 1976 1976 0.0 978 1262 1262 0 1262 2 6 1 1976.0 2 460 298 0 181500 2 2 1 181500
2 3 68.0 7 2001 2002 162.0 486 920 920 866 1786 2 6 1 2001.0 2 608 0 42 223500 1 1 1 223500
3 4 60.0 7 1915 1970 0.0 216 756 961 756 1717 1 7 1 1998.0 3 642 0 35 140000 3 2 2 140000
4 5 84.0 8 2000 2000 350.0 655 1145 1145 1053 2198 2 9 1 2000.0 3 836 192 84 250000 1 1 1 250000
In [ ]: